commit 6b2889e87aaf68240d6d78c00b136f570c7319df · pyrox.dev/nixpkgs

+228

maintainers/scripts/sha-to-sri.py

···

       1
       +
       #!/usr/bin/env nix-shell

     

       2
       +
       #! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ rich structlog ])"

     

       3
       +
       

     

       4
       +
       from abc import ABC, abstractclassmethod, abstractmethod

     

       5
       +
       from contextlib import contextmanager

     

       6
       +
       from pathlib import Path

     

       7
       +
       from structlog.contextvars import bound_contextvars as log_context

     

       8
       +
       from typing import ClassVar, List, Tuple

     

       9
       +
       

     

       10
       +
       import hashlib, re, structlog

     

       11
       +
       

     

       12
       +
       

     

       13
       +
       logger = structlog.getLogger("sha-to-SRI")

     

       14
       +
       

     

       15
       +
       

     

       16
       +
       class Encoding(ABC):

     

       17
       +
           alphabet: ClassVar[str]

     

       18
       +
       

     

       19
       +
           @classmethod

     

       20
       +
           @property

     

       21
       +
           def name(cls) -> str:

     

       22
       +
               return cls.__name__.lower()

     

       23
       +
       

     

       24
       +
           def toSRI(self, s: str) -> str:

     

       25
       +
               digest = self.decode(s)

     

       26
       +
               assert len(digest) == self.n

     

       27
       +
       

     

       28
       +
               from base64 import b64encode

     

       29
       +
               return f"{self.hashName}-{b64encode(digest).decode()}"

     

       30
       +
       

     

       31
       +
           @classmethod

     

       32
       +
           def all(cls, h) -> 'List[Encoding]':

     

       33
       +
               return [ c(h) for c in cls.__subclasses__() ]

     

       34
       +
       

     

       35
       +
           def __init__(self, h):

     

       36
       +
               self.n = h.digest_size

     

       37
       +
               self.hashName = h.name

     

       38
       +
       

     

       39
       +
           @property

     

       40
       +
           @abstractmethod

     

       41
       +
           def length(self) -> int:

     

       42
       +
               ...

     

       43
       +
       

     

       44
       +
           @property

     

       45
       +
           def regex(self) -> str:

     

       46
       +
               return f"[{self.alphabet}]{{{self.length}}}"

     

       47
       +
       

     

       48
       +
           @abstractmethod

     

       49
       +
           def decode(self, s: str) -> bytes:

     

       50
       +
               ...

     

       51
       +
       

     

       52
       +
       

     

       53
       +
       class Nix32(Encoding):

     

       54
       +
           alphabet = "0123456789abcdfghijklmnpqrsvwxyz"

     

       55
       +
           inverted  = { c: i for i, c in enumerate(alphabet) }

     

       56
       +
       

     

       57
       +
           @property

     

       58
       +
           def length(self):

     

       59
       +
               return 1 + (8 * self.n) // 5

     

       60
       +
           def decode(self, s: str):

     

       61
       +
               assert len(s) == self.length

     

       62
       +
               out = [ 0 for _ in range(self.n) ]

     

       63
       +
               # TODO: Do better than a list of byte-sized ints

     

       64
       +
       

     

       65
       +
               for n, c in enumerate(reversed(s)):

     

       66
       +
                   digit = self.inverted[c]

     

       67
       +
                   i, j = divmod(5 * n, 8)

     

       68
       +
                   out[i] = out[i] | (digit << j) & 0xff

     

       69
       +
                   rem = digit >> (8 - j)

     

       70
       +
                   if rem == 0:

     

       71
       +
                       continue

     

       72
       +
                   elif i < self.n:

     

       73
       +
                       out[i+1] = rem

     

       74
       +
                   else:

     

       75
       +
                       raise ValueError(f"Invalid nix32 hash: '{s}'")

     

       76
       +
       

     

       77
       +
               return bytes(out)

     

       78
       +
       

     

       79
       +
       class Hex(Encoding):

     

       80
       +
           alphabet = "0-9A-Fa-f"

     

       81
       +
       

     

       82
       +
           @property

     

       83
       +
           def length(self):

     

       84
       +
               return 2 * self.n

     

       85
       +
           def decode(self, s: str):

     

       86
       +
               from binascii import unhexlify

     

       87
       +
               return unhexlify(s)

     

       88
       +
       

     

       89
       +
       class Base64(Encoding):

     

       90
       +
           alphabet = "A-Za-z0-9+/"

     

       91
       +
       

     

       92
       +
           @property

     

       93
       +
           def format(self) -> Tuple[int, int]:

     

       94
       +
               """Number of characters in data and padding."""

     

       95
       +
               i, k = divmod(self.n, 3)

     

       96
       +
               return 4 * i + (0 if k == 0 else k + 1), (3 - k) % 3

     

       97
       +
           @property

     

       98
       +
           def length(self):

     

       99
       +
               return sum(self.format)

     

       100
       +
           @property

     

       101
       +
           def regex(self):

     

       102
       +
               data, padding = self.format

     

       103
       +
               return f"[{self.alphabet}]{{{data}}}={{{padding}}}"

     

       104
       +
           def decode(self, s):

     

       105
       +
               from base64 import b64decode

     

       106
       +
               return b64decode(s, validate = True)

     

       107
       +
       

     

       108
       +
       

     

       109
       +
       _HASHES = (hashlib.new(n) for n in ('SHA-256', 'SHA-512'))

     

       110
       +
       ENCODINGS = {

     

       111
       +
           h.name: Encoding.all(h)

     

       112
       +
           for h in _HASHES

     

       113
       +
       }

     

       114
       +
       

     

       115
       +
       RE = {

     

       116
       +
           h: "|".join(

     

       117
       +
               (f"({h}-)?" if e.name == 'base64' else '') +

     

       118
       +
               f"(?P<{h}_{e.name}>{e.regex})"

     

       119
       +
               for e in encodings

     

       120
       +
           ) for h, encodings in ENCODINGS.items()

     

       121
       +
       }

     

       122
       +
       

     

       123
       +
       _DEF_RE = re.compile("|".join(

     

       124
       +
           f"(?P<{h}>{h} = (?P<{h}_quote>['\"])({re})(?P={h}_quote);)"

     

       125
       +
           for h, re in RE.items()

     

       126
       +
       ))

     

       127
       +
       

     

       128
       +
       

     

       129
       +
       def defToSRI(s: str) -> str:

     

       130
       +
           def f(m: re.Match[str]) -> str:

     

       131
       +
               try:

     

       132
       +
                   for h, encodings in ENCODINGS.items():

     

       133
       +
                       if m.group(h) is None:

     

       134
       +
                           continue

     

       135
       +
       

     

       136
       +
                       for e in encodings:

     

       137
       +
                           s = m.group(f"{h}_{e.name}")

     

       138
       +
                           if s is not None:

     

       139
       +
                               return f'hash = "{e.toSRI(s)}";'

     

       140
       +
       

     

       141
       +
                       raise ValueError(f"Match with '{h}' but no subgroup")

     

       142
       +
                   raise ValueError("Match with no hash")

     

       143
       +
       

     

       144
       +
               except ValueError as exn:

     

       145
       +
                   logger.error(

     

       146
       +
                       "Skipping",

     

       147
       +
                       exc_info = exn,

     

       148
       +
                   )

     

       149
       +
                   return m.group()

     

       150
       +
       

     

       151
       +
           return _DEF_RE.sub(f, s)

     

       152
       +
       

     

       153
       +
       

     

       154
       +
       @contextmanager

     

       155
       +
       def atomicFileUpdate(target: Path):

     

       156
       +
           '''Atomically replace the contents of a file.

     

       157
       +
       

     

       158
       +
           Guarantees that no temporary files are left behind, and `target` is either

     

       159
       +
           left untouched, or overwritten with new content if no exception was raised.

     

       160
       +
       

     

       161
       +
           Yields a pair `(original, new)` of open files.

     

       162
       +
           `original` is the pre-existing file at `target`, open for reading;

     

       163
       +
           `new` is an empty, temporary file in the same filder, open for writing.

     

       164
       +
       

     

       165
       +
           Upon exiting the context, the files are closed; if no exception was

     

       166
       +
           raised, `new` (atomically) replaces the `target`, otherwise it is deleted.

     

       167
       +
           '''

     

       168
       +
           # That's mostly copied from noto-emoji.py, should DRY it out

     

       169
       +
           from tempfile import mkstemp

     

       170
       +
           fd, _p = mkstemp(

     

       171
       +
               dir = target.parent,

     

       172
       +
               prefix = target.name,

     

       173
       +
           )

     

       174
       +
           tmpPath = Path(_p)

     

       175
       +
       

     

       176
       +
           try:

     

       177
       +
               with target.open() as original:

     

       178
       +
                   with tmpPath.open('w') as new:

     

       179
       +
                       yield (original, new)

     

       180
       +
       

     

       181
       +
               tmpPath.replace(target)

     

       182
       +
       

     

       183
       +
           except Exception:

     

       184
       +
               tmpPath.unlink(missing_ok = True)

     

       185
       +
               raise

     

       186
       +
       

     

       187
       +
       

     

       188
       +
       def fileToSRI(p: Path):

     

       189
       +
           with atomicFileUpdate(p) as (og, new):

     

       190
       +
               for i, line in enumerate(og):

     

       191
       +
                   with log_context(line=i):

     

       192
       +
                       new.write(defToSRI(line))

     

       193
       +
       

     

       194
       +
       

     

       195
       +
       _SKIP_RE = re.compile(

     

       196
       +
           "(generated by)|(do not edit)",

     

       197
       +
           re.IGNORECASE

     

       198
       +
       )

     

       199
       +
       

     

       200
       +
       if __name__ == "__main__":

     

       201
       +
           from sys import argv, stderr

     

       202
       +
           logger.info("Starting!")

     

       203
       +
       

     

       204
       +
           for arg in argv[1:]:

     

       205
       +
               p = Path(arg)

     

       206
       +
               with log_context(path=str(p)):

     

       207
       +
                   try:

     

       208
       +
                       if p.name == "yarn.nix" or p.name.find("generated") != -1:

     

       209
       +
                           logger.warning("File looks autogenerated, skipping!")

     

       210
       +
                           continue

     

       211
       +
       

     

       212
       +
                       with p.open() as f:

     

       213
       +
                           for line in f:

     

       214
       +
                               if line.strip():

     

       215
       +
                                   break

     

       216
       +
       

     

       217
       +
                           if _SKIP_RE.search(line):

     

       218
       +
                               logger.warning("File looks autogenerated, skipping!")

     

       219
       +
                               continue

     

       220
       +
       

     

       221
       +
                       fileToSRI(p)

     

       222
       +
                   except Exception as exn:

     

       223
       +
                       logger.error(

     

       224
       +
                           "Unhandled exception, skipping file!",

     

       225
       +
                           exc_info = exn,

     

       226
       +
                       )

     

       227
       +
                   else:

     

       228
       +
                       logger.info("Finished processing file")

-149

maintainers/scripts/sha256-to-SRI.py

···

       1
       -
       #!/usr/bin/env nix-shell

     

       2
       -
       #! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ rich structlog ])"

     

       3
       -
       

     

       4
       -
       from contextlib import contextmanager

     

       5
       -
       from pathlib import Path

     

       6
       -
       from structlog.contextvars import bound_contextvars as log_context

     

       7
       -
       

     

       8
       -
       import re, structlog

     

       9
       -
       

     

       10
       -
       

     

       11
       -
       logger = structlog.getLogger("sha256-to-SRI")

     

       12
       -
       

     

       13
       -
       

     

       14
       -
       nix32alphabet = "0123456789abcdfghijklmnpqrsvwxyz"

     

       15
       -
       nix32inverted  = { c: i for i, c in enumerate(nix32alphabet) }

     

       16
       -
       

     

       17
       -
       def nix32decode(s: str) -> bytes:

     

       18
       -
           # only support sha256 hashes for now

     

       19
       -
           assert len(s) == 52

     

       20
       -
           out = [ 0 for _ in range(32) ]

     

       21
       -
           # TODO: Do better than a list of byte-sized ints

     

       22
       -
       

     

       23
       -
           for n, c in enumerate(reversed(s)):

     

       24
       -
               digit = nix32inverted[c]

     

       25
       -
               i, j = divmod(5 * n, 8)

     

       26
       -
               out[i] = out[i] | (digit << j) & 0xff

     

       27
       -
               rem = digit >> (8 - j)

     

       28
       -
               if rem == 0:

     

       29
       -
                   continue

     

       30
       -
               elif i < 31:

     

       31
       -
                   out[i+1] = rem

     

       32
       -
               else:

     

       33
       -
                   raise ValueError(f"Invalid nix32 hash: '{s}'")

     

       34
       -
       

     

       35
       -
           return bytes(out)

     

       36
       -
       

     

       37
       -
       

     

       38
       -
       def toSRI(digest: bytes) -> str:

     

       39
       -
           from base64 import b64encode

     

       40
       -
           assert len(digest) == 32

     

       41
       -
           return f"sha256-{b64encode(digest).decode()}"

     

       42
       -
       

     

       43
       -
       

     

       44
       -
       RE = {

     

       45
       -
           'nix32': f"[{nix32alphabet}]" "{52}",

     

       46
       -
           'hex':    "[0-9A-Fa-f]{64}",

     

       47
       -
           'base64': "[A-Za-z0-9+/]{43}=",

     

       48
       -
       }

     

       49
       -
       RE['sha256'] = '|'.join(

     

       50
       -
           f"{'(sha256-)?' if name == 'base64' else ''}"

     

       51
       -
           f"(?P<{name}>{r})"

     

       52
       -
           for name, r in RE.items()

     

       53
       -
       )

     

       54
       -
       

     

       55
       -
       def sha256toSRI(m: re.Match) -> str:

     

       56
       -
           """Produce the equivalent SRI string for any match of RE['sha256']"""

     

       57
       -
           if m['nix32'] is not None:

     

       58
       -
               return toSRI(nix32decode(m['nix32']))

     

       59
       -
           if m['hex'] is not None:

     

       60
       -
               from binascii import unhexlify

     

       61
       -
               return toSRI(unhexlify(m['hex']))

     

       62
       -
           if m['base64'] is not None:

     

       63
       -
               from base64 import b64decode

     

       64
       -
               return toSRI(b64decode(m['base64']))

     

       65
       -
       

     

       66
       -
           raise ValueError("Got a match where none of the groups captured")

     

       67
       -
       

     

       68
       -
       

     

       69
       -
       # Ohno I used evil, irregular backrefs instead of making 2 variants  ^^'

     

       70
       -
       _def_re = re.compile(

     

       71
       -
           "sha256 = (?P<quote>[\"'])"

     

       72
       -
           f"({RE['sha256']})"

     

       73
       -
           "(?P=quote);"

     

       74
       -
       )

     

       75
       -
       

     

       76
       -
       def defToSRI(s: str) -> str:

     

       77
       -
           def f(m: re.Match[str]) -> str:

     

       78
       -
               try:

     

       79
       -
                   return f'hash = "{sha256toSRI(m)}";'

     

       80
       -
       

     

       81
       -
               except ValueError as exn:

     

       82
       -
                   begin, end = m.span()

     

       83
       -
                   match = m.string[begin:end]

     

       84
       -
       

     

       85
       -
                   logger.error(

     

       86
       -
                       "Skipping",

     

       87
       -
                       exc_info = exn,

     

       88
       -
                   )

     

       89
       -
                   return match

     

       90
       -
       

     

       91
       -
           return _def_re.sub(f, s)

     

       92
       -
       

     

       93
       -
       

     

       94
       -
       @contextmanager

     

       95
       -
       def atomicFileUpdate(target: Path):

     

       96
       -
           '''Atomically replace the contents of a file.

     

       97
       -
       

     

       98
       -
           Guarantees that no temporary files are left behind, and `target` is either

     

       99
       -
           left untouched, or overwritten with new content if no exception was raised.

     

       100
       -
       

     

       101
       -
           Yields a pair `(original, new)` of open files.

     

       102
       -
           `original` is the pre-existing file at `target`, open for reading;

     

       103
       -
           `new` is an empty, temporary file in the same filder, open for writing.

     

       104
       -
       

     

       105
       -
           Upon exiting the context, the files are closed; if no exception was

     

       106
       -
           raised, `new` (atomically) replaces the `target`, otherwise it is deleted.

     

       107
       -
           '''

     

       108
       -
           # That's mostly copied from noto-emoji.py, should DRY it out

     

       109
       -
           from tempfile import mkstemp

     

       110
       -
           fd, _p = mkstemp(

     

       111
       -
               dir = target.parent,

     

       112
       -
               prefix = target.name,

     

       113
       -
           )

     

       114
       -
           tmpPath = Path(_p)

     

       115
       -
       

     

       116
       -
           try:

     

       117
       -
               with target.open() as original:

     

       118
       -
                   with tmpPath.open('w') as new:

     

       119
       -
                       yield (original, new)

     

       120
       -
       

     

       121
       -
               tmpPath.replace(target)

     

       122
       -
       

     

       123
       -
           except Exception:

     

       124
       -
               tmpPath.unlink(missing_ok = True)

     

       125
       -
               raise

     

       126
       -
       

     

       127
       -
       

     

       128
       -
       def fileToSRI(p: Path):

     

       129
       -
           with atomicFileUpdate(p) as (og, new):

     

       130
       -
               for i, line in enumerate(og):

     

       131
       -
                   with log_context(line=i):

     

       132
       -
                       new.write(defToSRI(line))

     

       133
       -
       

     

       134
       -
       

     

       135
       -
       if __name__ == "__main__":

     

       136
       -
           from sys import argv, stderr

     

       137
       -
       

     

       138
       -
           for arg in argv[1:]:

     

       139
       -
               p = Path(arg)

     

       140
       -
               with log_context(path=str(p)):

     

       141
       -
                   try:

     

       142
       -
                       fileToSRI(p)

     

       143
       -
                   except Exception as exn:

     

       144
       -
                       logger.error(

     

       145
       -
                           "Unhandled exception, skipping file!",

     

       146
       -
                           exc_info = exn,

     

       147
       -
                       )

     

       148
       -
                   else:

     

       149
       -
                       logger.info("Finished processing file")