at 23.11-pre 2.5 kB view raw
1 2# Convert a list of strings to a regex that matches everything but those strings 3# ... and it had to be a POSIX regex; no negative lookahead :( 4# This is a workaround for erofs supporting only exclude regex, not an include list 5 6import sys 7import re 8from collections import defaultdict 9 10# We can configure this script to match in different ways if we need to. 11# The regex got too long for the argument list, so we had to truncate the 12# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some 13# garbage like .lock files, but only if the sandbox doesn't hide those. Even 14# then it should be harmless. 15 16# Produce the negation of ^a$ 17MATCH_EXACTLY = ".+" 18# Produce the negation of ^a 19MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported?? 20# Produce the negation of ^a/? 21MATCH_SUBPATHS = "[^/].*$" 22 23# match_end = MATCH_SUBPATHS 24match_end = MATCH_STRING_PREFIX 25# match_end = MATCH_EXACTLY 26 27def chars_to_inverted_class(letters): 28 assert len(letters) > 0 29 letters = list(letters) 30 31 s = "[^" 32 33 if "]" in letters: 34 s += "]" 35 letters.remove("]") 36 37 final = "" 38 if "-" in letters: 39 final = "-" 40 letters.remove("-") 41 42 s += "".join(letters) 43 44 s += final 45 46 s += "]" 47 48 return s 49 50# There's probably at least one bug in here, but it seems to works well enough 51# for filtering store paths. 52def strings_to_inverted_regex(strings): 53 s = "(" 54 55 # Match anything that starts with the wrong character 56 57 chars = defaultdict(list) 58 59 for item in strings: 60 if item != "": 61 chars[item[0]].append(item[1:]) 62 63 if len(chars) == 0: 64 s += match_end 65 else: 66 s += chars_to_inverted_class(chars) 67 68 # Now match anything that starts with the right char, but then goes wrong 69 70 for char, sub in chars.items(): 71 s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")" 72 73 s += ")" 74 return s 75 76if __name__ == "__main__": 77 stdin_lines = [] 78 for line in sys.stdin: 79 if line.strip() != "": 80 stdin_lines.append(line.strip()) 81 82 print("^" + strings_to_inverted_regex(stdin_lines)) 83 84# Test: 85# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )" 86# should print ab, az, foo and their subpaths