friendship ended with social-app. php is my new best friend
1<?php
2
3/**
4 * This file is part of the Nette Framework (https://nette.org)
5 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6 */
7
8declare(strict_types=1);
9
10namespace Nette\Utils;
11
12use Nette;
13use function array_merge, count, func_get_args, func_num_args, glob, implode, is_array, is_dir, iterator_to_array, preg_match, preg_quote, preg_replace, preg_split, rtrim, spl_object_id, sprintf, str_ends_with, str_starts_with, strnatcmp, strpbrk, strrpos, strtolower, strtr, substr, usort;
14use const GLOB_NOESCAPE, GLOB_NOSORT, GLOB_ONLYDIR;
15
16
17/**
18 * Finder allows searching through directory trees using iterator.
19 *
20 * Finder::findFiles('*.php')
21 * ->size('> 10kB')
22 * ->from('.')
23 * ->exclude('temp');
24 *
25 * @implements \IteratorAggregate<string, FileInfo>
26 */
27class Finder implements \IteratorAggregate
28{
29 use Nette\SmartObject;
30
31 /** @var array<array{string, string}> */
32 private array $find = [];
33
34 /** @var string[] */
35 private array $in = [];
36
37 /** @var \Closure[] */
38 private array $filters = [];
39
40 /** @var \Closure[] */
41 private array $descentFilters = [];
42
43 /** @var array<string|self> */
44 private array $appends = [];
45 private bool $childFirst = false;
46
47 /** @var ?callable */
48 private $sort;
49 private int $maxDepth = -1;
50 private bool $ignoreUnreadableDirs = true;
51
52
53 /**
54 * Begins search for files and directories matching mask.
55 */
56 public static function find(string|array $masks = ['*']): static
57 {
58 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
59 return (new static)->addMask($masks, 'dir')->addMask($masks, 'file');
60 }
61
62
63 /**
64 * Begins search for files matching mask.
65 */
66 public static function findFiles(string|array $masks = ['*']): static
67 {
68 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
69 return (new static)->addMask($masks, 'file');
70 }
71
72
73 /**
74 * Begins search for directories matching mask.
75 */
76 public static function findDirectories(string|array $masks = ['*']): static
77 {
78 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
79 return (new static)->addMask($masks, 'dir');
80 }
81
82
83 /**
84 * Finds files matching the specified masks.
85 */
86 public function files(string|array $masks = ['*']): static
87 {
88 return $this->addMask((array) $masks, 'file');
89 }
90
91
92 /**
93 * Finds directories matching the specified masks.
94 */
95 public function directories(string|array $masks = ['*']): static
96 {
97 return $this->addMask((array) $masks, 'dir');
98 }
99
100
101 private function addMask(array $masks, string $mode): static
102 {
103 foreach ($masks as $mask) {
104 $mask = FileSystem::unixSlashes($mask);
105 if ($mode === 'dir') {
106 $mask = rtrim($mask, '/');
107 }
108 if ($mask === '' || ($mode === 'file' && str_ends_with($mask, '/'))) {
109 throw new Nette\InvalidArgumentException("Invalid mask '$mask'");
110 }
111 if (str_starts_with($mask, '**/')) {
112 $mask = substr($mask, 3);
113 }
114 $this->find[] = [$mask, $mode];
115 }
116 return $this;
117 }
118
119
120 /**
121 * Searches in the given directories. Wildcards are allowed.
122 */
123 public function in(string|array $paths): static
124 {
125 $paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic
126 $this->addLocation($paths, '');
127 return $this;
128 }
129
130
131 /**
132 * Searches recursively from the given directories. Wildcards are allowed.
133 */
134 public function from(string|array $paths): static
135 {
136 $paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic
137 $this->addLocation($paths, '/**');
138 return $this;
139 }
140
141
142 private function addLocation(array $paths, string $ext): void
143 {
144 foreach ($paths as $path) {
145 if ($path === '') {
146 throw new Nette\InvalidArgumentException("Invalid directory '$path'");
147 }
148 $path = rtrim(FileSystem::unixSlashes($path), '/');
149 $this->in[] = $path . $ext;
150 }
151 }
152
153
154 /**
155 * Lists directory's contents before the directory itself. By default, this is disabled.
156 */
157 public function childFirst(bool $state = true): static
158 {
159 $this->childFirst = $state;
160 return $this;
161 }
162
163
164 /**
165 * Ignores unreadable directories. By default, this is enabled.
166 */
167 public function ignoreUnreadableDirs(bool $state = true): static
168 {
169 $this->ignoreUnreadableDirs = $state;
170 return $this;
171 }
172
173
174 /**
175 * Set a compare function for sorting directory entries. The function will be called to sort entries from the same directory.
176 * @param callable(FileInfo, FileInfo): int $callback
177 */
178 public function sortBy(callable $callback): static
179 {
180 $this->sort = $callback;
181 return $this;
182 }
183
184
185 /**
186 * Sorts files in each directory naturally by name.
187 */
188 public function sortByName(): static
189 {
190 $this->sort = fn(FileInfo $a, FileInfo $b): int => strnatcmp($a->getBasename(), $b->getBasename());
191 return $this;
192 }
193
194
195 /**
196 * Adds the specified paths or appends a new finder that returns.
197 */
198 public function append(string|array|null $paths = null): static
199 {
200 if ($paths === null) {
201 return $this->appends[] = new static;
202 }
203
204 $this->appends = array_merge($this->appends, (array) $paths);
205 return $this;
206 }
207
208
209 /********************* filtering ****************d*g**/
210
211
212 /**
213 * Skips entries that matches the given masks relative to the ones defined with the in() or from() methods.
214 */
215 public function exclude(string|array $masks): static
216 {
217 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
218 foreach ($masks as $mask) {
219 $mask = FileSystem::unixSlashes($mask);
220 if (!preg_match('~^/?(\*\*/)?(.+)(/\*\*|/\*|/|)$~D', $mask, $m)) {
221 throw new Nette\InvalidArgumentException("Invalid mask '$mask'");
222 }
223 $end = $m[3];
224 $re = $this->buildPattern($m[2]);
225 $filter = fn(FileInfo $file): bool => ($end && !$file->isDir())
226 || !preg_match($re, FileSystem::unixSlashes($file->getRelativePathname()));
227
228 $this->descentFilter($filter);
229 if ($end !== '/*') {
230 $this->filter($filter);
231 }
232 }
233
234 return $this;
235 }
236
237
238 /**
239 * Yields only entries which satisfy the given filter.
240 * @param callable(FileInfo): bool $callback
241 */
242 public function filter(callable $callback): static
243 {
244 $this->filters[] = \Closure::fromCallable($callback);
245 return $this;
246 }
247
248
249 /**
250 * It descends only to directories that match the specified filter.
251 * @param callable(FileInfo): bool $callback
252 */
253 public function descentFilter(callable $callback): static
254 {
255 $this->descentFilters[] = \Closure::fromCallable($callback);
256 return $this;
257 }
258
259
260 /**
261 * Sets the maximum depth of entries.
262 */
263 public function limitDepth(?int $depth): static
264 {
265 $this->maxDepth = $depth ?? -1;
266 return $this;
267 }
268
269
270 /**
271 * Restricts the search by size. $operator accepts "[operator] [size] [unit]" example: >=10kB
272 */
273 public function size(string $operator, ?int $size = null): static
274 {
275 if (func_num_args() === 1) { // in $operator is predicate
276 if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?((?:\d*\.)?\d+)\s*(K|M|G|)B?$#Di', $operator, $matches)) {
277 throw new Nette\InvalidArgumentException('Invalid size predicate format.');
278 }
279
280 [, $operator, $size, $unit] = $matches;
281 $units = ['' => 1, 'k' => 1e3, 'm' => 1e6, 'g' => 1e9];
282 $size *= $units[strtolower($unit)];
283 $operator = $operator ?: '=';
284 }
285
286 return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getSize(), $operator, $size));
287 }
288
289
290 /**
291 * Restricts the search by modified time. $operator accepts "[operator] [date]" example: >1978-01-23
292 */
293 public function date(string $operator, string|int|\DateTimeInterface|null $date = null): static
294 {
295 if (func_num_args() === 1) { // in $operator is predicate
296 if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?(.+)$#Di', $operator, $matches)) {
297 throw new Nette\InvalidArgumentException('Invalid date predicate format.');
298 }
299
300 [, $operator, $date] = $matches;
301 $operator = $operator ?: '=';
302 }
303
304 $date = DateTime::from($date)->getTimestamp();
305 return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getMTime(), $operator, $date));
306 }
307
308
309 /********************* iterator generator ****************d*g**/
310
311
312 /**
313 * Returns an array with all found files and directories.
314 * @return list<FileInfo>
315 */
316 public function collect(): array
317 {
318 return iterator_to_array($this->getIterator(), preserve_keys: false);
319 }
320
321
322 /** @return \Generator<string, FileInfo> */
323 public function getIterator(): \Generator
324 {
325 $plan = $this->buildPlan();
326 foreach ($plan as $dir => $searches) {
327 yield from $this->traverseDir($dir, $searches);
328 }
329
330 foreach ($this->appends as $item) {
331 if ($item instanceof self) {
332 yield from $item->getIterator();
333 } else {
334 $item = FileSystem::platformSlashes($item);
335 yield $item => new FileInfo($item);
336 }
337 }
338 }
339
340
341 /**
342 * @param array<object{pattern: string, mode: string, recursive: bool}> $searches
343 * @param string[] $subdirs
344 * @return \Generator<string, FileInfo>
345 */
346 private function traverseDir(string $dir, array $searches, array $subdirs = []): \Generator
347 {
348 if ($this->maxDepth >= 0 && count($subdirs) > $this->maxDepth) {
349 return;
350 } elseif (!is_dir($dir)) {
351 throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($dir, '/\\')));
352 }
353
354 try {
355 $pathNames = new \FilesystemIterator($dir, \FilesystemIterator::FOLLOW_SYMLINKS | \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME | \FilesystemIterator::UNIX_PATHS);
356 } catch (\UnexpectedValueException $e) {
357 if ($this->ignoreUnreadableDirs) {
358 return;
359 } else {
360 throw new Nette\InvalidStateException($e->getMessage());
361 }
362 }
363
364 $files = $this->convertToFiles($pathNames, implode('/', $subdirs), FileSystem::isAbsolute($dir));
365
366 if ($this->sort) {
367 $files = iterator_to_array($files);
368 usort($files, $this->sort);
369 }
370
371 foreach ($files as $file) {
372 $pathName = $file->getPathname();
373 $cache = $subSearch = [];
374
375 if ($file->isDir()) {
376 foreach ($searches as $search) {
377 if ($search->recursive && $this->proveFilters($this->descentFilters, $file, $cache)) {
378 $subSearch[] = $search;
379 }
380 }
381 }
382
383 if ($this->childFirst && $subSearch) {
384 yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()]));
385 }
386
387 $relativePathname = FileSystem::unixSlashes($file->getRelativePathname());
388 foreach ($searches as $search) {
389 if (
390 $file->{'is' . $search->mode}()
391 && preg_match($search->pattern, $relativePathname)
392 && $this->proveFilters($this->filters, $file, $cache)
393 ) {
394 yield $pathName => $file;
395 break;
396 }
397 }
398
399 if (!$this->childFirst && $subSearch) {
400 yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()]));
401 }
402 }
403 }
404
405
406 private function convertToFiles(iterable $pathNames, string $relativePath, bool $absolute): \Generator
407 {
408 foreach ($pathNames as $pathName) {
409 if (!$absolute) {
410 $pathName = preg_replace('~\.?/~A', '', $pathName);
411 }
412 $pathName = FileSystem::platformSlashes($pathName);
413 yield new FileInfo($pathName, $relativePath);
414 }
415 }
416
417
418 private function proveFilters(array $filters, FileInfo $file, array &$cache): bool
419 {
420 foreach ($filters as $filter) {
421 $res = &$cache[spl_object_id($filter)];
422 $res ??= $filter($file);
423 if (!$res) {
424 return false;
425 }
426 }
427
428 return true;
429 }
430
431
432 /** @return array<string, array<object{pattern: string, mode: string, recursive: bool}>> */
433 private function buildPlan(): array
434 {
435 $plan = $dirCache = [];
436 foreach ($this->find as [$mask, $mode]) {
437 $splits = [];
438 if (FileSystem::isAbsolute($mask)) {
439 if ($this->in) {
440 throw new Nette\InvalidStateException("You cannot combine the absolute path in the mask '$mask' and the directory to search '{$this->in[0]}'.");
441 }
442 $splits[] = self::splitRecursivePart($mask);
443 } else {
444 foreach ($this->in ?: ['.'] as $in) {
445 $in = strtr($in, ['[' => '[[]', ']' => '[]]']); // in path, do not treat [ and ] as a pattern by glob()
446 $splits[] = self::splitRecursivePart($in . '/' . $mask);
447 }
448 }
449
450 foreach ($splits as [$base, $rest, $recursive]) {
451 $base = $base === '' ? '.' : $base;
452 $dirs = $dirCache[$base] ??= strpbrk($base, '*?[')
453 ? glob($base, GLOB_NOSORT | GLOB_ONLYDIR | GLOB_NOESCAPE)
454 : [strtr($base, ['[[]' => '[', '[]]' => ']'])]; // unescape [ and ]
455
456 if (!$dirs) {
457 throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($base, '/\\')));
458 }
459
460 $search = (object) ['pattern' => $this->buildPattern($rest), 'mode' => $mode, 'recursive' => $recursive];
461 foreach ($dirs as $dir) {
462 $plan[$dir][] = $search;
463 }
464 }
465 }
466
467 return $plan;
468 }
469
470
471 /**
472 * Since glob() does not know ** wildcard, we divide the path into a part for glob and a part for manual traversal.
473 */
474 private static function splitRecursivePart(string $path): array
475 {
476 $a = strrpos($path, '/');
477 $parts = preg_split('~(?<=^|/)\*\*($|/)~', substr($path, 0, $a + 1), 2);
478 return isset($parts[1])
479 ? [$parts[0], $parts[1] . substr($path, $a + 1), true]
480 : [$parts[0], substr($path, $a + 1), false];
481 }
482
483
484 /**
485 * Converts wildcards to regular expression.
486 */
487 private function buildPattern(string $mask): string
488 {
489 if ($mask === '*') {
490 return '##';
491 } elseif (str_starts_with($mask, './')) {
492 $anchor = '^';
493 $mask = substr($mask, 2);
494 } else {
495 $anchor = '(?:^|/)';
496 }
497
498 $pattern = strtr(
499 preg_quote($mask, '#'),
500 [
501 '\*\*/' => '(.+/)?',
502 '\*' => '[^/]*',
503 '\?' => '[^/]',
504 '\[\!' => '[^',
505 '\[' => '[',
506 '\]' => ']',
507 '\-' => '-',
508 ],
509 );
510 return '#' . $anchor . $pattern . '$#D' . (Helpers::IsWindows ? 'i' : '');
511 }
512}