···
2
+
# Convert a list of strings to a regex that matches everything but those strings
3
+
# ... and it had to be a POSIX regex; no negative lookahead :(
4
+
# This is a workaround for erofs supporting only exclude regex, not an include list
8
+
from collections import defaultdict
10
+
# We can configure this script to match in different ways if we need to.
11
+
# The regex got too long for the argument list, so we had to truncate the
12
+
# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some
13
+
# garbage like .lock files, but only if the sandbox doesn't hide those. Even
14
+
# then it should be harmless.
16
+
# Produce the negation of ^a$
17
+
MATCH_EXACTLY = ".+"
18
+
# Produce the negation of ^a
19
+
MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported??
20
+
# Produce the negation of ^a/?
21
+
MATCH_SUBPATHS = "[^/].*$"
23
+
# match_end = MATCH_SUBPATHS
24
+
match_end = MATCH_STRING_PREFIX
25
+
# match_end = MATCH_EXACTLY
27
+
def chars_to_inverted_class(letters):
28
+
assert len(letters) > 0
29
+
letters = list(letters)
42
+
s += "".join(letters)
50
+
# There's probably at least one bug in here, but it seems to works well enough
51
+
# for filtering store paths.
52
+
def strings_to_inverted_regex(strings):
55
+
# Match anything that starts with the wrong character
57
+
chars = defaultdict(list)
59
+
for item in strings:
61
+
chars[item[0]].append(item[1:])
66
+
s += chars_to_inverted_class(chars)
68
+
# Now match anything that starts with the right char, but then goes wrong
70
+
for char, sub in chars.items():
71
+
s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")"
76
+
if __name__ == "__main__":
78
+
for line in sys.stdin:
79
+
if line.strip() != "":
80
+
stdin_lines.append(line.strip())
82
+
print("^" + strings_to_inverted_regex(stdin_lines))
85
+
# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )"
86
+
# should print ab, az, foo and their subpaths