1
2# Convert a list of strings to a regex that matches everything but those strings
3# ... and it had to be a POSIX regex; no negative lookahead :(
4# This is a workaround for erofs supporting only exclude regex, not an include list
5
6import sys
7import re
8from collections import defaultdict
9
10# We can configure this script to match in different ways if we need to.
11# The regex got too long for the argument list, so we had to truncate the
12# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some
13# garbage like .lock files, but only if the sandbox doesn't hide those. Even
14# then it should be harmless.
15
16# Produce the negation of ^a$
17MATCH_EXACTLY = ".+"
18# Produce the negation of ^a
19MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported??
20# Produce the negation of ^a/?
21MATCH_SUBPATHS = "[^/].*$"
22
23# match_end = MATCH_SUBPATHS
24match_end = MATCH_STRING_PREFIX
25# match_end = MATCH_EXACTLY
26
27def chars_to_inverted_class(letters):
28 assert len(letters) > 0
29 letters = list(letters)
30
31 s = "[^"
32
33 if "]" in letters:
34 s += "]"
35 letters.remove("]")
36
37 final = ""
38 if "-" in letters:
39 final = "-"
40 letters.remove("-")
41
42 s += "".join(letters)
43
44 s += final
45
46 s += "]"
47
48 return s
49
50# There's probably at least one bug in here, but it seems to works well enough
51# for filtering store paths.
52def strings_to_inverted_regex(strings):
53 s = "("
54
55 # Match anything that starts with the wrong character
56
57 chars = defaultdict(list)
58
59 for item in strings:
60 if item != "":
61 chars[item[0]].append(item[1:])
62
63 if len(chars) == 0:
64 s += match_end
65 else:
66 s += chars_to_inverted_class(chars)
67
68 # Now match anything that starts with the right char, but then goes wrong
69
70 for char, sub in chars.items():
71 s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")"
72
73 s += ")"
74 return s
75
76if __name__ == "__main__":
77 stdin_lines = []
78 for line in sys.stdin:
79 if line.strip() != "":
80 stdin_lines.append(line.strip())
81
82 print("^" + strings_to_inverted_regex(stdin_lines))
83
84# Test:
85# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )"
86# should print ab, az, foo and their subpaths