friendship ended with social-app. php is my new best friend
at main 14 kB view raw
1<?php 2 3/** 4 * This file is part of the Nette Framework (https://nette.org) 5 * Copyright (c) 2004 David Grudl (https://davidgrudl.com) 6 */ 7 8declare(strict_types=1); 9 10namespace Nette\Utils; 11 12use Nette; 13use function array_merge, count, func_get_args, func_num_args, glob, implode, is_array, is_dir, iterator_to_array, preg_match, preg_quote, preg_replace, preg_split, rtrim, spl_object_id, sprintf, str_ends_with, str_starts_with, strnatcmp, strpbrk, strrpos, strtolower, strtr, substr, usort; 14use const GLOB_NOESCAPE, GLOB_NOSORT, GLOB_ONLYDIR; 15 16 17/** 18 * Finder allows searching through directory trees using iterator. 19 * 20 * Finder::findFiles('*.php') 21 * ->size('> 10kB') 22 * ->from('.') 23 * ->exclude('temp'); 24 * 25 * @implements \IteratorAggregate<string, FileInfo> 26 */ 27class Finder implements \IteratorAggregate 28{ 29 use Nette\SmartObject; 30 31 /** @var array<array{string, string}> */ 32 private array $find = []; 33 34 /** @var string[] */ 35 private array $in = []; 36 37 /** @var \Closure[] */ 38 private array $filters = []; 39 40 /** @var \Closure[] */ 41 private array $descentFilters = []; 42 43 /** @var array<string|self> */ 44 private array $appends = []; 45 private bool $childFirst = false; 46 47 /** @var ?callable */ 48 private $sort; 49 private int $maxDepth = -1; 50 private bool $ignoreUnreadableDirs = true; 51 52 53 /** 54 * Begins search for files and directories matching mask. 55 */ 56 public static function find(string|array $masks = ['*']): static 57 { 58 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 59 return (new static)->addMask($masks, 'dir')->addMask($masks, 'file'); 60 } 61 62 63 /** 64 * Begins search for files matching mask. 65 */ 66 public static function findFiles(string|array $masks = ['*']): static 67 { 68 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 69 return (new static)->addMask($masks, 'file'); 70 } 71 72 73 /** 74 * Begins search for directories matching mask. 75 */ 76 public static function findDirectories(string|array $masks = ['*']): static 77 { 78 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 79 return (new static)->addMask($masks, 'dir'); 80 } 81 82 83 /** 84 * Finds files matching the specified masks. 85 */ 86 public function files(string|array $masks = ['*']): static 87 { 88 return $this->addMask((array) $masks, 'file'); 89 } 90 91 92 /** 93 * Finds directories matching the specified masks. 94 */ 95 public function directories(string|array $masks = ['*']): static 96 { 97 return $this->addMask((array) $masks, 'dir'); 98 } 99 100 101 private function addMask(array $masks, string $mode): static 102 { 103 foreach ($masks as $mask) { 104 $mask = FileSystem::unixSlashes($mask); 105 if ($mode === 'dir') { 106 $mask = rtrim($mask, '/'); 107 } 108 if ($mask === '' || ($mode === 'file' && str_ends_with($mask, '/'))) { 109 throw new Nette\InvalidArgumentException("Invalid mask '$mask'"); 110 } 111 if (str_starts_with($mask, '**/')) { 112 $mask = substr($mask, 3); 113 } 114 $this->find[] = [$mask, $mode]; 115 } 116 return $this; 117 } 118 119 120 /** 121 * Searches in the given directories. Wildcards are allowed. 122 */ 123 public function in(string|array $paths): static 124 { 125 $paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic 126 $this->addLocation($paths, ''); 127 return $this; 128 } 129 130 131 /** 132 * Searches recursively from the given directories. Wildcards are allowed. 133 */ 134 public function from(string|array $paths): static 135 { 136 $paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic 137 $this->addLocation($paths, '/**'); 138 return $this; 139 } 140 141 142 private function addLocation(array $paths, string $ext): void 143 { 144 foreach ($paths as $path) { 145 if ($path === '') { 146 throw new Nette\InvalidArgumentException("Invalid directory '$path'"); 147 } 148 $path = rtrim(FileSystem::unixSlashes($path), '/'); 149 $this->in[] = $path . $ext; 150 } 151 } 152 153 154 /** 155 * Lists directory's contents before the directory itself. By default, this is disabled. 156 */ 157 public function childFirst(bool $state = true): static 158 { 159 $this->childFirst = $state; 160 return $this; 161 } 162 163 164 /** 165 * Ignores unreadable directories. By default, this is enabled. 166 */ 167 public function ignoreUnreadableDirs(bool $state = true): static 168 { 169 $this->ignoreUnreadableDirs = $state; 170 return $this; 171 } 172 173 174 /** 175 * Set a compare function for sorting directory entries. The function will be called to sort entries from the same directory. 176 * @param callable(FileInfo, FileInfo): int $callback 177 */ 178 public function sortBy(callable $callback): static 179 { 180 $this->sort = $callback; 181 return $this; 182 } 183 184 185 /** 186 * Sorts files in each directory naturally by name. 187 */ 188 public function sortByName(): static 189 { 190 $this->sort = fn(FileInfo $a, FileInfo $b): int => strnatcmp($a->getBasename(), $b->getBasename()); 191 return $this; 192 } 193 194 195 /** 196 * Adds the specified paths or appends a new finder that returns. 197 */ 198 public function append(string|array|null $paths = null): static 199 { 200 if ($paths === null) { 201 return $this->appends[] = new static; 202 } 203 204 $this->appends = array_merge($this->appends, (array) $paths); 205 return $this; 206 } 207 208 209 /********************* filtering ****************d*g**/ 210 211 212 /** 213 * Skips entries that matches the given masks relative to the ones defined with the in() or from() methods. 214 */ 215 public function exclude(string|array $masks): static 216 { 217 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 218 foreach ($masks as $mask) { 219 $mask = FileSystem::unixSlashes($mask); 220 if (!preg_match('~^/?(\*\*/)?(.+)(/\*\*|/\*|/|)$~D', $mask, $m)) { 221 throw new Nette\InvalidArgumentException("Invalid mask '$mask'"); 222 } 223 $end = $m[3]; 224 $re = $this->buildPattern($m[2]); 225 $filter = fn(FileInfo $file): bool => ($end && !$file->isDir()) 226 || !preg_match($re, FileSystem::unixSlashes($file->getRelativePathname())); 227 228 $this->descentFilter($filter); 229 if ($end !== '/*') { 230 $this->filter($filter); 231 } 232 } 233 234 return $this; 235 } 236 237 238 /** 239 * Yields only entries which satisfy the given filter. 240 * @param callable(FileInfo): bool $callback 241 */ 242 public function filter(callable $callback): static 243 { 244 $this->filters[] = \Closure::fromCallable($callback); 245 return $this; 246 } 247 248 249 /** 250 * It descends only to directories that match the specified filter. 251 * @param callable(FileInfo): bool $callback 252 */ 253 public function descentFilter(callable $callback): static 254 { 255 $this->descentFilters[] = \Closure::fromCallable($callback); 256 return $this; 257 } 258 259 260 /** 261 * Sets the maximum depth of entries. 262 */ 263 public function limitDepth(?int $depth): static 264 { 265 $this->maxDepth = $depth ?? -1; 266 return $this; 267 } 268 269 270 /** 271 * Restricts the search by size. $operator accepts "[operator] [size] [unit]" example: >=10kB 272 */ 273 public function size(string $operator, ?int $size = null): static 274 { 275 if (func_num_args() === 1) { // in $operator is predicate 276 if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?((?:\d*\.)?\d+)\s*(K|M|G|)B?$#Di', $operator, $matches)) { 277 throw new Nette\InvalidArgumentException('Invalid size predicate format.'); 278 } 279 280 [, $operator, $size, $unit] = $matches; 281 $units = ['' => 1, 'k' => 1e3, 'm' => 1e6, 'g' => 1e9]; 282 $size *= $units[strtolower($unit)]; 283 $operator = $operator ?: '='; 284 } 285 286 return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getSize(), $operator, $size)); 287 } 288 289 290 /** 291 * Restricts the search by modified time. $operator accepts "[operator] [date]" example: >1978-01-23 292 */ 293 public function date(string $operator, string|int|\DateTimeInterface|null $date = null): static 294 { 295 if (func_num_args() === 1) { // in $operator is predicate 296 if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?(.+)$#Di', $operator, $matches)) { 297 throw new Nette\InvalidArgumentException('Invalid date predicate format.'); 298 } 299 300 [, $operator, $date] = $matches; 301 $operator = $operator ?: '='; 302 } 303 304 $date = DateTime::from($date)->getTimestamp(); 305 return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getMTime(), $operator, $date)); 306 } 307 308 309 /********************* iterator generator ****************d*g**/ 310 311 312 /** 313 * Returns an array with all found files and directories. 314 * @return list<FileInfo> 315 */ 316 public function collect(): array 317 { 318 return iterator_to_array($this->getIterator(), preserve_keys: false); 319 } 320 321 322 /** @return \Generator<string, FileInfo> */ 323 public function getIterator(): \Generator 324 { 325 $plan = $this->buildPlan(); 326 foreach ($plan as $dir => $searches) { 327 yield from $this->traverseDir($dir, $searches); 328 } 329 330 foreach ($this->appends as $item) { 331 if ($item instanceof self) { 332 yield from $item->getIterator(); 333 } else { 334 $item = FileSystem::platformSlashes($item); 335 yield $item => new FileInfo($item); 336 } 337 } 338 } 339 340 341 /** 342 * @param array<object{pattern: string, mode: string, recursive: bool}> $searches 343 * @param string[] $subdirs 344 * @return \Generator<string, FileInfo> 345 */ 346 private function traverseDir(string $dir, array $searches, array $subdirs = []): \Generator 347 { 348 if ($this->maxDepth >= 0 && count($subdirs) > $this->maxDepth) { 349 return; 350 } elseif (!is_dir($dir)) { 351 throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($dir, '/\\'))); 352 } 353 354 try { 355 $pathNames = new \FilesystemIterator($dir, \FilesystemIterator::FOLLOW_SYMLINKS | \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME | \FilesystemIterator::UNIX_PATHS); 356 } catch (\UnexpectedValueException $e) { 357 if ($this->ignoreUnreadableDirs) { 358 return; 359 } else { 360 throw new Nette\InvalidStateException($e->getMessage()); 361 } 362 } 363 364 $files = $this->convertToFiles($pathNames, implode('/', $subdirs), FileSystem::isAbsolute($dir)); 365 366 if ($this->sort) { 367 $files = iterator_to_array($files); 368 usort($files, $this->sort); 369 } 370 371 foreach ($files as $file) { 372 $pathName = $file->getPathname(); 373 $cache = $subSearch = []; 374 375 if ($file->isDir()) { 376 foreach ($searches as $search) { 377 if ($search->recursive && $this->proveFilters($this->descentFilters, $file, $cache)) { 378 $subSearch[] = $search; 379 } 380 } 381 } 382 383 if ($this->childFirst && $subSearch) { 384 yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()])); 385 } 386 387 $relativePathname = FileSystem::unixSlashes($file->getRelativePathname()); 388 foreach ($searches as $search) { 389 if ( 390 $file->{'is' . $search->mode}() 391 && preg_match($search->pattern, $relativePathname) 392 && $this->proveFilters($this->filters, $file, $cache) 393 ) { 394 yield $pathName => $file; 395 break; 396 } 397 } 398 399 if (!$this->childFirst && $subSearch) { 400 yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()])); 401 } 402 } 403 } 404 405 406 private function convertToFiles(iterable $pathNames, string $relativePath, bool $absolute): \Generator 407 { 408 foreach ($pathNames as $pathName) { 409 if (!$absolute) { 410 $pathName = preg_replace('~\.?/~A', '', $pathName); 411 } 412 $pathName = FileSystem::platformSlashes($pathName); 413 yield new FileInfo($pathName, $relativePath); 414 } 415 } 416 417 418 private function proveFilters(array $filters, FileInfo $file, array &$cache): bool 419 { 420 foreach ($filters as $filter) { 421 $res = &$cache[spl_object_id($filter)]; 422 $res ??= $filter($file); 423 if (!$res) { 424 return false; 425 } 426 } 427 428 return true; 429 } 430 431 432 /** @return array<string, array<object{pattern: string, mode: string, recursive: bool}>> */ 433 private function buildPlan(): array 434 { 435 $plan = $dirCache = []; 436 foreach ($this->find as [$mask, $mode]) { 437 $splits = []; 438 if (FileSystem::isAbsolute($mask)) { 439 if ($this->in) { 440 throw new Nette\InvalidStateException("You cannot combine the absolute path in the mask '$mask' and the directory to search '{$this->in[0]}'."); 441 } 442 $splits[] = self::splitRecursivePart($mask); 443 } else { 444 foreach ($this->in ?: ['.'] as $in) { 445 $in = strtr($in, ['[' => '[[]', ']' => '[]]']); // in path, do not treat [ and ] as a pattern by glob() 446 $splits[] = self::splitRecursivePart($in . '/' . $mask); 447 } 448 } 449 450 foreach ($splits as [$base, $rest, $recursive]) { 451 $base = $base === '' ? '.' : $base; 452 $dirs = $dirCache[$base] ??= strpbrk($base, '*?[') 453 ? glob($base, GLOB_NOSORT | GLOB_ONLYDIR | GLOB_NOESCAPE) 454 : [strtr($base, ['[[]' => '[', '[]]' => ']'])]; // unescape [ and ] 455 456 if (!$dirs) { 457 throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($base, '/\\'))); 458 } 459 460 $search = (object) ['pattern' => $this->buildPattern($rest), 'mode' => $mode, 'recursive' => $recursive]; 461 foreach ($dirs as $dir) { 462 $plan[$dir][] = $search; 463 } 464 } 465 } 466 467 return $plan; 468 } 469 470 471 /** 472 * Since glob() does not know ** wildcard, we divide the path into a part for glob and a part for manual traversal. 473 */ 474 private static function splitRecursivePart(string $path): array 475 { 476 $a = strrpos($path, '/'); 477 $parts = preg_split('~(?<=^|/)\*\*($|/)~', substr($path, 0, $a + 1), 2); 478 return isset($parts[1]) 479 ? [$parts[0], $parts[1] . substr($path, $a + 1), true] 480 : [$parts[0], substr($path, $a + 1), false]; 481 } 482 483 484 /** 485 * Converts wildcards to regular expression. 486 */ 487 private function buildPattern(string $mask): string 488 { 489 if ($mask === '*') { 490 return '##'; 491 } elseif (str_starts_with($mask, './')) { 492 $anchor = '^'; 493 $mask = substr($mask, 2); 494 } else { 495 $anchor = '(?:^|/)'; 496 } 497 498 $pattern = strtr( 499 preg_quote($mask, '#'), 500 [ 501 '\*\*/' => '(.+/)?', 502 '\*' => '[^/]*', 503 '\?' => '[^/]', 504 '\[\!' => '[^', 505 '\[' => '[', 506 '\]' => ']', 507 '\-' => '-', 508 ], 509 ); 510 return '#' . $anchor . $pattern . '$#D' . (Helpers::IsWindows ? 'i' : ''); 511 } 512}