friendship ended with social-app. php is my new best friend
1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\CssSelector\Parser;
13
14use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
15use Symfony\Component\CssSelector\Node;
16use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
17
18/**
19 * CSS selector parser.
20 *
21 * This component is a port of the Python cssselect library,
22 * which is copyright Ian Bicking, @see https://github.com/scrapy/cssselect.
23 *
24 * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
25 *
26 * @internal
27 */
28class Parser implements ParserInterface
29{
30 private Tokenizer $tokenizer;
31
32 public function __construct(?Tokenizer $tokenizer = null)
33 {
34 $this->tokenizer = $tokenizer ?? new Tokenizer();
35 }
36
37 public function parse(string $source): array
38 {
39 $reader = new Reader($source);
40 $stream = $this->tokenizer->tokenize($reader);
41
42 return $this->parseSelectorList($stream);
43 }
44
45 /**
46 * Parses the arguments for ":nth-child()" and friends.
47 *
48 * @param Token[] $tokens
49 *
50 * @throws SyntaxErrorException
51 */
52 public static function parseSeries(array $tokens): array
53 {
54 foreach ($tokens as $token) {
55 if ($token->isString()) {
56 throw SyntaxErrorException::stringAsFunctionArgument();
57 }
58 }
59
60 $joined = trim(implode('', array_map(fn (Token $token) => $token->getValue(), $tokens)));
61
62 $int = function ($string) {
63 if (!is_numeric($string)) {
64 throw SyntaxErrorException::stringAsFunctionArgument();
65 }
66
67 return (int) $string;
68 };
69
70 switch (true) {
71 case 'odd' === $joined:
72 return [2, 1];
73 case 'even' === $joined:
74 return [2, 0];
75 case 'n' === $joined:
76 return [1, 0];
77 case !str_contains($joined, 'n'):
78 return [0, $int($joined)];
79 }
80
81 $split = explode('n', $joined);
82 $first = $split[0] ?? null;
83
84 return [
85 $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1,
86 isset($split[1]) && $split[1] ? $int($split[1]) : 0,
87 ];
88 }
89
90 private function parseSelectorList(TokenStream $stream, bool $isArgument = false): array
91 {
92 $stream->skipWhitespace();
93 $selectors = [];
94
95 while (true) {
96 if ($isArgument && $stream->getPeek()->isDelimiter([')'])) {
97 break;
98 }
99
100 $selectors[] = $this->parserSelectorNode($stream, $isArgument);
101
102 if ($stream->getPeek()->isDelimiter([','])) {
103 $stream->getNext();
104 $stream->skipWhitespace();
105 } else {
106 break;
107 }
108 }
109
110 return $selectors;
111 }
112
113 private function parserSelectorNode(TokenStream $stream, bool $isArgument = false): Node\SelectorNode
114 {
115 [$result, $pseudoElement] = $this->parseSimpleSelector($stream, false, $isArgument);
116
117 while (true) {
118 $stream->skipWhitespace();
119 $peek = $stream->getPeek();
120
121 if (
122 $peek->isFileEnd()
123 || $peek->isDelimiter([','])
124 || ($isArgument && $peek->isDelimiter([')']))
125 ) {
126 break;
127 }
128
129 if (null !== $pseudoElement) {
130 throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
131 }
132
133 if ($peek->isDelimiter(['+', '>', '~'])) {
134 $combinator = $stream->getNext()->getValue();
135 $stream->skipWhitespace();
136 } else {
137 $combinator = ' ';
138 }
139
140 [$nextSelector, $pseudoElement] = $this->parseSimpleSelector($stream, false, $isArgument);
141 $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
142 }
143
144 return new Node\SelectorNode($result, $pseudoElement);
145 }
146
147 /**
148 * Parses next simple node (hash, class, pseudo, negation).
149 *
150 * @throws SyntaxErrorException
151 */
152 private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = false, bool $isArgument = false): array
153 {
154 $stream->skipWhitespace();
155
156 $selectorStart = \count($stream->getUsed());
157 $result = $this->parseElementNode($stream);
158 $pseudoElement = null;
159
160 while (true) {
161 $peek = $stream->getPeek();
162 if ($peek->isWhitespace()
163 || $peek->isFileEnd()
164 || $peek->isDelimiter([',', '+', '>', '~'])
165 || ($isArgument && $peek->isDelimiter([')']))
166 ) {
167 break;
168 }
169
170 if (null !== $pseudoElement) {
171 throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
172 }
173
174 if ($peek->isHash()) {
175 $result = new Node\HashNode($result, $stream->getNext()->getValue());
176 } elseif ($peek->isDelimiter(['.'])) {
177 $stream->getNext();
178 $result = new Node\ClassNode($result, $stream->getNextIdentifier());
179 } elseif ($peek->isDelimiter(['['])) {
180 $stream->getNext();
181 $result = $this->parseAttributeNode($result, $stream);
182 } elseif ($peek->isDelimiter([':'])) {
183 $stream->getNext();
184
185 if ($stream->getPeek()->isDelimiter([':'])) {
186 $stream->getNext();
187 $pseudoElement = $stream->getNextIdentifier();
188
189 continue;
190 }
191
192 $identifier = $stream->getNextIdentifier();
193 if (\in_array(strtolower($identifier), ['first-line', 'first-letter', 'before', 'after'])) {
194 // Special case: CSS 2.1 pseudo-elements can have a single ':'.
195 // Any new pseudo-element must have two.
196 $pseudoElement = $identifier;
197
198 continue;
199 }
200
201 if (!$stream->getPeek()->isDelimiter(['('])) {
202 $result = new Node\PseudoNode($result, $identifier);
203 if ('Pseudo[Element[*]:scope]' === $result->__toString()) {
204 $used = \count($stream->getUsed());
205 if (!(2 === $used
206 || 3 === $used && $stream->getUsed()[0]->isWhiteSpace()
207 || $used >= 3 && $stream->getUsed()[$used - 3]->isDelimiter([','])
208 || $used >= 4
209 && $stream->getUsed()[$used - 3]->isWhiteSpace()
210 && $stream->getUsed()[$used - 4]->isDelimiter([','])
211 )) {
212 throw SyntaxErrorException::notAtTheStartOfASelector('scope');
213 }
214 }
215 continue;
216 }
217
218 $stream->getNext();
219 $stream->skipWhitespace();
220
221 if ('not' === strtolower($identifier)) {
222 if ($insideNegation) {
223 throw SyntaxErrorException::nestedNot();
224 }
225
226 [$argument, $argumentPseudoElement] = $this->parseSimpleSelector($stream, true, true);
227 $next = $stream->getNext();
228
229 if (null !== $argumentPseudoElement) {
230 throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()');
231 }
232
233 if (!$next->isDelimiter([')'])) {
234 throw SyntaxErrorException::unexpectedToken('")"', $next);
235 }
236
237 $result = new Node\NegationNode($result, $argument);
238 } elseif ('is' === strtolower($identifier)) {
239 $selectors = $this->parseSelectorList($stream, true);
240
241 $next = $stream->getNext();
242 if (!$next->isDelimiter([')'])) {
243 throw SyntaxErrorException::unexpectedToken('")"', $next);
244 }
245
246 $result = new Node\MatchingNode($result, $selectors);
247 } elseif ('where' === strtolower($identifier)) {
248 $selectors = $this->parseSelectorList($stream, true);
249
250 $next = $stream->getNext();
251 if (!$next->isDelimiter([')'])) {
252 throw SyntaxErrorException::unexpectedToken('")"', $next);
253 }
254
255 $result = new Node\SpecificityAdjustmentNode($result, $selectors);
256 } else {
257 $arguments = [];
258 $next = null;
259
260 while (true) {
261 $stream->skipWhitespace();
262 $next = $stream->getNext();
263
264 if ($next->isIdentifier()
265 || $next->isString()
266 || $next->isNumber()
267 || $next->isDelimiter(['+', '-'])
268 ) {
269 $arguments[] = $next;
270 } elseif ($next->isDelimiter([')'])) {
271 break;
272 } else {
273 throw SyntaxErrorException::unexpectedToken('an argument', $next);
274 }
275 }
276
277 if (!$arguments) {
278 throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
279 }
280
281 $result = new Node\FunctionNode($result, $identifier, $arguments);
282 }
283 } else {
284 throw SyntaxErrorException::unexpectedToken('selector', $peek);
285 }
286 }
287
288 if (\count($stream->getUsed()) === $selectorStart) {
289 throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek());
290 }
291
292 return [$result, $pseudoElement];
293 }
294
295 private function parseElementNode(TokenStream $stream): Node\ElementNode
296 {
297 $peek = $stream->getPeek();
298
299 if ($peek->isIdentifier() || $peek->isDelimiter(['*'])) {
300 if ($peek->isIdentifier()) {
301 $namespace = $stream->getNext()->getValue();
302 } else {
303 $stream->getNext();
304 $namespace = null;
305 }
306
307 if ($stream->getPeek()->isDelimiter(['|'])) {
308 $stream->getNext();
309 $element = $stream->getNextIdentifierOrStar();
310 } else {
311 $element = $namespace;
312 $namespace = null;
313 }
314 } else {
315 $element = $namespace = null;
316 }
317
318 return new Node\ElementNode($namespace, $element);
319 }
320
321 private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream): Node\AttributeNode
322 {
323 $stream->skipWhitespace();
324 $attribute = $stream->getNextIdentifierOrStar();
325
326 if (null === $attribute && !$stream->getPeek()->isDelimiter(['|'])) {
327 throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek());
328 }
329
330 if ($stream->getPeek()->isDelimiter(['|'])) {
331 $stream->getNext();
332
333 if ($stream->getPeek()->isDelimiter(['='])) {
334 $namespace = null;
335 $stream->getNext();
336 $operator = '|=';
337 } else {
338 $namespace = $attribute;
339 $attribute = $stream->getNextIdentifier();
340 $operator = null;
341 }
342 } else {
343 $namespace = $operator = null;
344 }
345
346 if (null === $operator) {
347 $stream->skipWhitespace();
348 $next = $stream->getNext();
349
350 if ($next->isDelimiter([']'])) {
351 return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null);
352 } elseif ($next->isDelimiter(['='])) {
353 $operator = '=';
354 } elseif ($next->isDelimiter(['^', '$', '*', '~', '|', '!'])
355 && $stream->getPeek()->isDelimiter(['='])
356 ) {
357 $operator = $next->getValue().'=';
358 $stream->getNext();
359 } else {
360 throw SyntaxErrorException::unexpectedToken('operator', $next);
361 }
362 }
363
364 $stream->skipWhitespace();
365 $value = $stream->getNext();
366
367 if ($value->isNumber()) {
368 // if the value is a number, it's casted into a string
369 $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition());
370 }
371
372 if (!($value->isIdentifier() || $value->isString())) {
373 throw SyntaxErrorException::unexpectedToken('string or identifier', $value);
374 }
375
376 $stream->skipWhitespace();
377 $next = $stream->getNext();
378
379 if (!$next->isDelimiter([']'])) {
380 throw SyntaxErrorException::unexpectedToken('"]"', $next);
381 }
382
383 return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue());
384 }
385}