vendor/nette/utils/src/Utils/Strings.php at main · veryroundbird.house/smallbird-social

friendship ended with social-app. php is my new best friend
smallbird-social / vendor / nette / utils / src / Utils / Strings.php
at main 24 kB view raw
  1<?php
  2
  3/**
  4 * This file is part of the Nette Framework (https://nette.org)
  5 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
  6 */
  7
  8declare(strict_types=1);
  9
 10namespace Nette\Utils;
 11
 12use JetBrains\PhpStorm\Language;
 13use Nette;
 14use function array_keys, array_map, array_shift, array_values, bin2hex, class_exists, defined, extension_loaded, function_exists, htmlspecialchars, htmlspecialchars_decode, iconv, iconv_strlen, iconv_substr, implode, in_array, is_array, is_callable, is_int, is_object, is_string, key, max, mb_convert_case, mb_strlen, mb_strtolower, mb_strtoupper, mb_substr, pack, preg_last_error, preg_last_error_msg, preg_quote, preg_replace, str_contains, str_ends_with, str_repeat, str_replace, str_starts_with, strlen, strpos, strrev, strrpos, strtolower, strtoupper, strtr, substr, trim, unpack, utf8_decode;
 15use const ENT_IGNORE, ENT_NOQUOTES, ICONV_IMPL, MB_CASE_TITLE, PHP_EOL, PREG_OFFSET_CAPTURE, PREG_PATTERN_ORDER, PREG_SET_ORDER, PREG_SPLIT_DELIM_CAPTURE, PREG_SPLIT_NO_EMPTY, PREG_SPLIT_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL;
 16
 17
 18/**
 19 * String tools library.
 20 */
 21class Strings
 22{
 23	use Nette\StaticClass;
 24
 25	public const TrimCharacters = " \t\n\r\0\x0B\u{A0}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{200B}\u{2028}\u{3000}";
 26
 27	/** @deprecated use Strings::TrimCharacters */
 28	public const TRIM_CHARACTERS = self::TrimCharacters;
 29
 30
 31	/**
 32	 * @deprecated use Nette\Utils\Validators::isUnicode()
 33	 */
 34	public static function checkEncoding(string $s): bool
 35	{
 36		return $s === self::fixEncoding($s);
 37	}
 38
 39
 40	/**
 41	 * Removes all invalid UTF-8 characters from a string.
 42	 */
 43	public static function fixEncoding(string $s): string
 44	{
 45		// removes xD800-xDFFF, x110000 and higher
 46		return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
 47	}
 48
 49
 50	/**
 51	 * Returns a specific character in UTF-8 from code point (number in range 0x0000..D7FF or 0xE000..10FFFF).
 52	 * @throws Nette\InvalidArgumentException if code point is not in valid range
 53	 */
 54	public static function chr(int $code): string
 55	{
 56		if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
 57			throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
 58		} elseif (!extension_loaded('iconv')) {
 59			throw new Nette\NotSupportedException(__METHOD__ . '() requires ICONV extension that is not loaded.');
 60		}
 61
 62		return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
 63	}
 64
 65
 66	/**
 67	 * Returns a code point of specific character in UTF-8 (number in range 0x0000..D7FF or 0xE000..10FFFF).
 68	 */
 69	public static function ord(string $c): int
 70	{
 71		if (!extension_loaded('iconv')) {
 72			throw new Nette\NotSupportedException(__METHOD__ . '() requires ICONV extension that is not loaded.');
 73		}
 74
 75		$tmp = iconv('UTF-8', 'UTF-32BE//IGNORE', $c);
 76		if (!$tmp) {
 77			throw new Nette\InvalidArgumentException('Invalid UTF-8 character "' . ($c === '' ? '' : '\x' . strtoupper(bin2hex($c))) . '".');
 78		}
 79
 80		return unpack('N', $tmp)[1];
 81	}
 82
 83
 84	/**
 85	 * @deprecated use str_starts_with()
 86	 */
 87	public static function startsWith(string $haystack, string $needle): bool
 88	{
 89		return str_starts_with($haystack, $needle);
 90	}
 91
 92
 93	/**
 94	 * @deprecated use str_ends_with()
 95	 */
 96	public static function endsWith(string $haystack, string $needle): bool
 97	{
 98		return str_ends_with($haystack, $needle);
 99	}
100
101
102	/**
103	 * @deprecated use str_contains()
104	 */
105	public static function contains(string $haystack, string $needle): bool
106	{
107		return str_contains($haystack, $needle);
108	}
109
110
111	/**
112	 * Returns a part of UTF-8 string specified by starting position and length. If start is negative,
113	 * the returned string will start at the start'th character from the end of string.
114	 */
115	public static function substring(string $s, int $start, ?int $length = null): string
116	{
117		if (function_exists('mb_substr')) {
118			return mb_substr($s, $start, $length, 'UTF-8'); // MB is much faster
119		} elseif (!extension_loaded('iconv')) {
120			throw new Nette\NotSupportedException(__METHOD__ . '() requires extension ICONV or MBSTRING, neither is loaded.');
121		} elseif ($length === null) {
122			$length = self::length($s);
123		} elseif ($start < 0 && $length < 0) {
124			$start += self::length($s); // unifies iconv_substr behavior with mb_substr
125		}
126
127		return iconv_substr($s, $start, $length, 'UTF-8');
128	}
129
130
131	/**
132	 * Removes control characters, normalizes line breaks to `\n`, removes leading and trailing blank lines,
133	 * trims end spaces on lines, normalizes UTF-8 to the normal form of NFC.
134	 */
135	public static function normalize(string $s): string
136	{
137		// convert to compressed normal form (NFC)
138		if (class_exists('Normalizer', false) && ($n = \Normalizer::normalize($s, \Normalizer::FORM_C)) !== false) {
139			$s = $n;
140		}
141
142		$s = self::unixNewLines($s);
143
144		// remove control characters; leave \t + \n
145		$s = self::pcre('preg_replace', ['#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s]);
146
147		// right trim
148		$s = self::pcre('preg_replace', ['#[\t ]+$#m', '', $s]);
149
150		// leading and trailing blank lines
151		$s = trim($s, "\n");
152
153		return $s;
154	}
155
156
157	/** @deprecated use Strings::unixNewLines() */
158	public static function normalizeNewLines(string $s): string
159	{
160		return self::unixNewLines($s);
161	}
162
163
164	/**
165	 * Converts line endings to \n used on Unix-like systems.
166	 * Line endings are: \n, \r, \r\n, U+2028 line separator, U+2029 paragraph separator.
167	 */
168	public static function unixNewLines(string $s): string
169	{
170		return preg_replace("~\r\n?|\u{2028}|\u{2029}~", "\n", $s);
171	}
172
173
174	/**
175	 * Converts line endings to platform-specific, i.e. \r\n on Windows and \n elsewhere.
176	 * Line endings are: \n, \r, \r\n, U+2028 line separator, U+2029 paragraph separator.
177	 */
178	public static function platformNewLines(string $s): string
179	{
180		return preg_replace("~\r\n?|\n|\u{2028}|\u{2029}~", PHP_EOL, $s);
181	}
182
183
184	/**
185	 * Converts UTF-8 string to ASCII, ie removes diacritics etc.
186	 */
187	public static function toAscii(string $s): string
188	{
189		$iconv = defined('ICONV_IMPL') ? trim(ICONV_IMPL, '"\'') : null;
190		static $transliterator = null;
191		if ($transliterator === null) {
192			if (class_exists('Transliterator', false)) {
193				$transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
194			} else {
195				trigger_error(__METHOD__ . "(): it is recommended to enable PHP extensions 'intl'.", E_USER_NOTICE);
196				$transliterator = false;
197			}
198		}
199
200		// remove control characters and check UTF-8 validity
201		$s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]);
202
203		// transliteration (by Transliterator and iconv) is not optimal, replace some characters directly
204		$s = strtr($s, ["\u{201E}" => '"', "\u{201C}" => '"', "\u{201D}" => '"', "\u{201A}" => "'", "\u{2018}" => "'", "\u{2019}" => "'", "\u{B0}" => '^', "\u{42F}" => 'Ya', "\u{44F}" => 'ya', "\u{42E}" => 'Yu', "\u{44E}" => 'yu', "\u{c4}" => 'Ae', "\u{d6}" => 'Oe', "\u{dc}" => 'Ue', "\u{1e9e}" => 'Ss', "\u{e4}" => 'ae', "\u{f6}" => 'oe', "\u{fc}" => 'ue', "\u{df}" => 'ss']); // „ “ ” ‚ ‘ ’ ° Я я Ю ю Ä Ö Ü ẞ ä ö ü ß
205		if ($iconv !== 'libiconv') {
206			$s = strtr($s, ["\u{AE}" => '(R)', "\u{A9}" => '(c)', "\u{2026}" => '...', "\u{AB}" => '<<', "\u{BB}" => '>>', "\u{A3}" => 'lb', "\u{A5}" => 'yen', "\u{B2}" => '^2', "\u{B3}" => '^3', "\u{B5}" => 'u', "\u{B9}" => '^1', "\u{BA}" => 'o', "\u{BF}" => '?', "\u{2CA}" => "'", "\u{2CD}" => '_', "\u{2DD}" => '"', "\u{1FEF}" => '', "\u{20AC}" => 'EUR', "\u{2122}" => 'TM', "\u{212E}" => 'e', "\u{2190}" => '<-', "\u{2191}" => '^', "\u{2192}" => '->', "\u{2193}" => 'V', "\u{2194}" => '<->']); // ® © … « » £ ¥ ² ³ µ ¹ º ¿ ˊ ˍ ˝ ` € ™ ℮ ← ↑ → ↓ ↔
207		}
208
209		if ($transliterator) {
210			$s = $transliterator->transliterate($s);
211			// use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
212			if ($iconv === 'glibc') {
213				$s = strtr($s, '?', "\x01"); // temporarily hide ? to distinguish them from the garbage that iconv creates
214				$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
215				$s = str_replace(['?', "\x01"], ['', '?'], $s); // remove garbage and restore ? characters
216			} elseif ($iconv === 'libiconv') {
217				$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
218			} else { // null or 'unknown' (#216)
219				$s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]); // remove non-ascii chars
220			}
221		} elseif ($iconv === 'glibc' || $iconv === 'libiconv') {
222			// temporarily hide these characters to distinguish them from the garbage that iconv creates
223			$s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
224			if ($iconv === 'glibc') {
225				// glibc implementation is very limited. transliterate into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
226				$s = iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
227				$s = strtr(
228					$s,
229					"\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
230					'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.',
231				);
232				$s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]);
233			} else {
234				$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
235			}
236
237			// remove garbage that iconv creates during transliteration (eg Ý -> Y')
238			$s = str_replace(['`', "'", '"', '^', '~', '?'], '', $s);
239			// restore temporarily hidden characters
240			$s = strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
241		} else {
242			$s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]); // remove non-ascii chars
243		}
244
245		return $s;
246	}
247
248
249	/**
250	 * Modifies the UTF-8 string to the form used in the URL, ie removes diacritics and replaces all characters
251	 * except letters of the English alphabet and numbers with a hyphens.
252	 */
253	public static function webalize(string $s, ?string $charlist = null, bool $lower = true): string
254	{
255		$s = self::toAscii($s);
256		if ($lower) {
257			$s = strtolower($s);
258		}
259
260		$s = self::pcre('preg_replace', ['#[^a-z0-9' . ($charlist !== null ? preg_quote($charlist, '#') : '') . ']+#i', '-', $s]);
261		$s = trim($s, '-');
262		return $s;
263	}
264
265
266	/**
267	 * Truncates a UTF-8 string to given maximal length, while trying not to split whole words. Only if the string is truncated,
268	 * an ellipsis (or something else set with third argument) is appended to the string.
269	 */
270	public static function truncate(string $s, int $maxLen, string $append = "\u{2026}"): string
271	{
272		if (self::length($s) > $maxLen) {
273			$maxLen -= self::length($append);
274			if ($maxLen < 1) {
275				return $append;
276
277			} elseif ($matches = self::match($s, '#^.{1,' . $maxLen . '}(?=[\s\x00-/:-@\[-`{-~])#us')) {
278				return $matches[0] . $append;
279
280			} else {
281				return self::substring($s, 0, $maxLen) . $append;
282			}
283		}
284
285		return $s;
286	}
287
288
289	/**
290	 * Indents a multiline text from the left. Second argument sets how many indentation chars should be used,
291	 * while the indent itself is the third argument (*tab* by default).
292	 */
293	public static function indent(string $s, int $level = 1, string $chars = "\t"): string
294	{
295		if ($level > 0) {
296			$s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
297		}
298
299		return $s;
300	}
301
302
303	/**
304	 * Converts all characters of UTF-8 string to lower case.
305	 */
306	public static function lower(string $s): string
307	{
308		return mb_strtolower($s, 'UTF-8');
309	}
310
311
312	/**
313	 * Converts the first character of a UTF-8 string to lower case and leaves the other characters unchanged.
314	 */
315	public static function firstLower(string $s): string
316	{
317		return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
318	}
319
320
321	/**
322	 * Converts all characters of a UTF-8 string to upper case.
323	 */
324	public static function upper(string $s): string
325	{
326		return mb_strtoupper($s, 'UTF-8');
327	}
328
329
330	/**
331	 * Converts the first character of a UTF-8 string to upper case and leaves the other characters unchanged.
332	 */
333	public static function firstUpper(string $s): string
334	{
335		return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
336	}
337
338
339	/**
340	 * Converts the first character of every word of a UTF-8 string to upper case and the others to lower case.
341	 */
342	public static function capitalize(string $s): string
343	{
344		return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
345	}
346
347
348	/**
349	 * Compares two UTF-8 strings or their parts, without taking character case into account. If length is null, whole strings are compared,
350	 * if it is negative, the corresponding number of characters from the end of the strings is compared,
351	 * otherwise the appropriate number of characters from the beginning is compared.
352	 */
353	public static function compare(string $left, string $right, ?int $length = null): bool
354	{
355		if (class_exists('Normalizer', false)) {
356			$left = \Normalizer::normalize($left, \Normalizer::FORM_D); // form NFD is faster
357			$right = \Normalizer::normalize($right, \Normalizer::FORM_D); // form NFD is faster
358		}
359
360		if ($length < 0) {
361			$left = self::substring($left, $length, -$length);
362			$right = self::substring($right, $length, -$length);
363		} elseif ($length !== null) {
364			$left = self::substring($left, 0, $length);
365			$right = self::substring($right, 0, $length);
366		}
367
368		return self::lower($left) === self::lower($right);
369	}
370
371
372	/**
373	 * Finds the common prefix of strings or returns empty string if the prefix was not found.
374	 * @param  string[]  $strings
375	 */
376	public static function findPrefix(array $strings): string
377	{
378		$first = array_shift($strings);
379		for ($i = 0; $i < strlen($first); $i++) {
380			foreach ($strings as $s) {
381				if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
382					while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
383						$i--;
384					}
385
386					return substr($first, 0, $i);
387				}
388			}
389		}
390
391		return $first;
392	}
393
394
395	/**
396	 * Returns number of characters (not bytes) in UTF-8 string.
397	 * That is the number of Unicode code points which may differ from the number of graphemes.
398	 */
399	public static function length(string $s): int
400	{
401		return match (true) {
402			extension_loaded('mbstring') => mb_strlen($s, 'UTF-8'),
403			extension_loaded('iconv') => iconv_strlen($s, 'UTF-8'),
404			default => strlen(@utf8_decode($s)), // deprecated
405		};
406	}
407
408
409	/**
410	 * Removes all left and right side spaces (or the characters passed as second argument) from a UTF-8 encoded string.
411	 */
412	public static function trim(string $s, string $charlist = self::TrimCharacters): string
413	{
414		$charlist = preg_quote($charlist, '#');
415		return self::replace($s, '#^[' . $charlist . ']+|[' . $charlist . ']+$#Du', '');
416	}
417
418
419	/**
420	 * Pads a UTF-8 string to given length by prepending the $pad string to the beginning.
421	 * @param  non-empty-string  $pad
422	 */
423	public static function padLeft(string $s, int $length, string $pad = ' '): string
424	{
425		$length = max(0, $length - self::length($s));
426		$padLen = self::length($pad);
427		return str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen) . $s;
428	}
429
430
431	/**
432	 * Pads UTF-8 string to given length by appending the $pad string to the end.
433	 * @param  non-empty-string  $pad
434	 */
435	public static function padRight(string $s, int $length, string $pad = ' '): string
436	{
437		$length = max(0, $length - self::length($s));
438		$padLen = self::length($pad);
439		return $s . str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen);
440	}
441
442
443	/**
444	 * Reverses UTF-8 string.
445	 */
446	public static function reverse(string $s): string
447	{
448		if (!extension_loaded('iconv')) {
449			throw new Nette\NotSupportedException(__METHOD__ . '() requires ICONV extension that is not loaded.');
450		}
451
452		return iconv('UTF-32LE', 'UTF-8', strrev(iconv('UTF-8', 'UTF-32BE', $s)));
453	}
454
455
456	/**
457	 * Returns part of $haystack before $nth occurence of $needle or returns null if the needle was not found.
458	 * Negative value means searching from the end.
459	 */
460	public static function before(string $haystack, string $needle, int $nth = 1): ?string
461	{
462		$pos = self::pos($haystack, $needle, $nth);
463		return $pos === null
464			? null
465			: substr($haystack, 0, $pos);
466	}
467
468
469	/**
470	 * Returns part of $haystack after $nth occurence of $needle or returns null if the needle was not found.
471	 * Negative value means searching from the end.
472	 */
473	public static function after(string $haystack, string $needle, int $nth = 1): ?string
474	{
475		$pos = self::pos($haystack, $needle, $nth);
476		return $pos === null
477			? null
478			: substr($haystack, $pos + strlen($needle));
479	}
480
481
482	/**
483	 * Returns position in characters of $nth occurence of $needle in $haystack or null if the $needle was not found.
484	 * Negative value of `$nth` means searching from the end.
485	 */
486	public static function indexOf(string $haystack, string $needle, int $nth = 1): ?int
487	{
488		$pos = self::pos($haystack, $needle, $nth);
489		return $pos === null
490			? null
491			: self::length(substr($haystack, 0, $pos));
492	}
493
494
495	/**
496	 * Returns position in characters of $nth occurence of $needle in $haystack or null if the needle was not found.
497	 */
498	private static function pos(string $haystack, string $needle, int $nth = 1): ?int
499	{
500		if (!$nth) {
501			return null;
502		} elseif ($nth > 0) {
503			if ($needle === '') {
504				return 0;
505			}
506
507			$pos = 0;
508			while (($pos = strpos($haystack, $needle, $pos)) !== false && --$nth) {
509				$pos++;
510			}
511		} else {
512			$len = strlen($haystack);
513			if ($needle === '') {
514				return $len;
515			} elseif ($len === 0) {
516				return null;
517			}
518
519			$pos = $len - 1;
520			while (($pos = strrpos($haystack, $needle, $pos - $len)) !== false && ++$nth) {
521				$pos--;
522			}
523		}
524
525		return Helpers::falseToNull($pos);
526	}
527
528
529	/**
530	 * Divides the string into arrays according to the regular expression. Expressions in parentheses will be captured and returned as well.
531	 */
532	public static function split(
533		string $subject,
534		#[Language('RegExp')]
535		string $pattern,
536		bool|int $captureOffset = false,
537		bool $skipEmpty = false,
538		int $limit = -1,
539		bool $utf8 = false,
540	): array
541	{
542		$flags = is_int($captureOffset)  // back compatibility
543			? $captureOffset
544			: ($captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0) | ($skipEmpty ? PREG_SPLIT_NO_EMPTY : 0);
545
546		$pattern .= $utf8 ? 'u' : '';
547		$m = self::pcre('preg_split', [$pattern, $subject, $limit, $flags | PREG_SPLIT_DELIM_CAPTURE]);
548		return $utf8 && $captureOffset
549			? self::bytesToChars($subject, [$m])[0]
550			: $m;
551	}
552
553
554	/**
555	 * Searches the string for the part matching the regular expression and returns
556	 * an array with the found expression and individual subexpressions, or `null`.
557	 */
558	public static function match(
559		string $subject,
560		#[Language('RegExp')]
561		string $pattern,
562		bool|int $captureOffset = false,
563		int $offset = 0,
564		bool $unmatchedAsNull = false,
565		bool $utf8 = false,
566	): ?array
567	{
568		$flags = is_int($captureOffset) // back compatibility
569			? $captureOffset
570			: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
571
572		if ($utf8) {
573			$offset = strlen(self::substring($subject, 0, $offset));
574			$pattern .= 'u';
575		}
576
577		if ($offset > strlen($subject)) {
578			return null;
579		} elseif (!self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])) {
580			return null;
581		} elseif ($utf8 && $captureOffset) {
582			return self::bytesToChars($subject, [$m])[0];
583		} else {
584			return $m;
585		}
586	}
587
588
589	/**
590	 * Searches the string for all occurrences matching the regular expression and
591	 * returns an array of arrays containing the found expression and each subexpression.
592	 * @return ($lazy is true ? \Generator<int, array> : array[])
593	 */
594	public static function matchAll(
595		string $subject,
596		#[Language('RegExp')]
597		string $pattern,
598		bool|int $captureOffset = false,
599		int $offset = 0,
600		bool $unmatchedAsNull = false,
601		bool $patternOrder = false,
602		bool $utf8 = false,
603		bool $lazy = false,
604	): array|\Generator
605	{
606		if ($utf8) {
607			$offset = strlen(self::substring($subject, 0, $offset));
608			$pattern .= 'u';
609		}
610
611		if ($lazy) {
612			$flags = PREG_OFFSET_CAPTURE | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
613			return (function () use ($utf8, $captureOffset, $flags, $subject, $pattern, $offset) {
614				$counter = 0;
615				while (
616					$offset <= strlen($subject) - ($counter ? 1 : 0)
617					&& self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
618				) {
619					$offset = $m[0][1] + max(1, strlen($m[0][0]));
620					if (!$captureOffset) {
621						$m = array_map(fn($item) => $item[0], $m);
622					} elseif ($utf8) {
623						$m = self::bytesToChars($subject, [$m])[0];
624					}
625					yield $counter++ => $m;
626				}
627			})();
628		}
629
630		if ($offset > strlen($subject)) {
631			return [];
632		}
633
634		$flags = is_int($captureOffset) // back compatibility
635			? $captureOffset
636			: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);
637
638		self::pcre('preg_match_all', [
639			$pattern, $subject, &$m,
640			($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
641			$offset,
642		]);
643		return $utf8 && $captureOffset
644			? self::bytesToChars($subject, $m)
645			: $m;
646	}
647
648
649	/**
650	 * Replaces all occurrences matching regular expression $pattern which can be string or array in the form `pattern => replacement`.
651	 */
652	public static function replace(
653		string $subject,
654		#[Language('RegExp')]
655		string|array $pattern,
656		string|callable $replacement = '',
657		int $limit = -1,
658		bool $captureOffset = false,
659		bool $unmatchedAsNull = false,
660		bool $utf8 = false,
661	): string
662	{
663		if (is_object($replacement) || is_array($replacement)) {
664			if (!is_callable($replacement, false, $textual)) {
665				throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
666			}
667
668			$flags = ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
669			if ($utf8) {
670				$pattern .= 'u';
671				if ($captureOffset) {
672					$replacement = fn($m) => $replacement(self::bytesToChars($subject, [$m])[0]);
673				}
674			}
675
676			return self::pcre('preg_replace_callback', [$pattern, $replacement, $subject, $limit, 0, $flags]);
677
678		} elseif (is_array($pattern) && is_string(key($pattern))) {
679			$replacement = array_values($pattern);
680			$pattern = array_keys($pattern);
681		}
682
683		if ($utf8) {
684			$pattern = array_map(fn($item) => $item . 'u', (array) $pattern);
685		}
686
687		return self::pcre('preg_replace', [$pattern, $replacement, $subject, $limit]);
688	}
689
690
691	private static function bytesToChars(string $s, array $groups): array
692	{
693		$lastBytes = $lastChars = 0;
694		foreach ($groups as &$matches) {
695			foreach ($matches as &$match) {
696				if ($match[1] > $lastBytes) {
697					$lastChars += self::length(substr($s, $lastBytes, $match[1] - $lastBytes));
698				} elseif ($match[1] < $lastBytes) {
699					$lastChars -= self::length(substr($s, $match[1], $lastBytes - $match[1]));
700				}
701
702				$lastBytes = $match[1];
703				$match[1] = $lastChars;
704			}
705		}
706
707		return $groups;
708	}
709
710
711	/** @internal */
712	public static function pcre(string $func, array $args)
713	{
714		$res = Callback::invokeSafe($func, $args, function (string $message) use ($args): void {
715			// compile-time error, not detectable by preg_last_error
716			throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
717		});
718
719		if (($code = preg_last_error()) // run-time error, but preg_last_error & return code are liars
720			&& ($res === null || !in_array($func, ['preg_filter', 'preg_replace_callback', 'preg_replace'], true))
721		) {
722			throw new RegexpException(preg_last_error_msg()
723				. ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
724		}
725
726		return $res;
727	}
728}