pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manual_structure.py at 23.11-beta · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / tools / nix / nixos-render-docs / src / nixos_render_docs / manual_structure.py
at 23.11-beta 8.8 kB view raw
  1from __future__ import annotations
  2
  3import dataclasses as dc
  4import html
  5import itertools
  6
  7from typing import cast, get_args, Iterable, Literal, Sequence
  8
  9from markdown_it.token import Token
 10
 11from .utils import Freezeable
 12
 13# FragmentType is used to restrict structural include blocks.
 14FragmentType = Literal['preface', 'part', 'chapter', 'section', 'appendix']
 15
 16# in the TOC all fragments are allowed, plus the all-encompassing book.
 17TocEntryType = Literal['book', 'preface', 'part', 'chapter', 'section', 'appendix', 'example', 'figure']
 18
 19def is_include(token: Token) -> bool:
 20    return token.type == "fence" and token.info.startswith("{=include=} ")
 21
 22# toplevel file must contain only the title headings and includes, anything else
 23# would cause strange rendering.
 24def _check_book_structure(tokens: Sequence[Token]) -> None:
 25    for token in tokens[6:]:
 26        if not is_include(token):
 27            assert token.map
 28            raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
 29                               "expected structural include")
 30
 31# much like books, parts may not contain headings other than their title heading.
 32# this is a limitation of the current renderers and TOC generators that do not handle
 33# this case well even though it is supported in docbook (and probably supportable
 34# anywhere else).
 35def _check_part_structure(tokens: Sequence[Token]) -> None:
 36    _check_fragment_structure(tokens)
 37    for token in tokens[3:]:
 38        if token.type == 'heading_open':
 39            assert token.map
 40            raise RuntimeError(f"unexpected heading in line {token.map[0] + 1}")
 41
 42# two include blocks must either be adjacent or separated by a heading, otherwise
 43# we cannot generate a correct TOC (since there'd be nothing to link to between
 44# the two includes).
 45def _check_fragment_structure(tokens: Sequence[Token]) -> None:
 46    for i, token in enumerate(tokens):
 47        if is_include(token) \
 48           and i + 1 < len(tokens) \
 49           and not (is_include(tokens[i + 1]) or tokens[i + 1].type == 'heading_open'):
 50            assert token.map
 51            raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
 52                               "expected heading or structural include")
 53
 54def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
 55    wanted = { 'h1': 'title' }
 56    wanted |= { 'h2': 'subtitle' } if kind == 'book' else {}
 57    for (i, (tag, role)) in enumerate(wanted.items()):
 58        if len(tokens) < 3 * (i + 1):
 59            raise RuntimeError(f"missing {role} ({tag}) heading")
 60        token = tokens[3 * i]
 61        if token.type != 'heading_open' or token.tag != tag:
 62            assert token.map
 63            raise RuntimeError(f"expected {role} ({tag}) heading in line {token.map[0] + 1}", token)
 64    for t in tokens[3 * len(wanted):]:
 65        if t.type != 'heading_open' or not (role := wanted.get(t.tag, '')):
 66            continue
 67        assert t.map
 68        raise RuntimeError(
 69            f"only one {role} heading ({t.markup} [text...]) allowed per "
 70            f"{kind}, but found a second in line {t.map[0] + 1}. "
 71            "please remove all such headings except the first or demote the subsequent headings.",
 72            t)
 73
 74    last_heading_level = 0
 75    for token in tokens:
 76        if token.type != 'heading_open':
 77            continue
 78
 79        # book subtitle headings do not need an id, only book title headings do.
 80        # every other headings needs one too. we need this to build a TOC and to
 81        # provide stable links if the manual changes shape.
 82        if 'id' not in token.attrs and (kind != 'book' or token.tag != 'h2'):
 83            assert token.map
 84            raise RuntimeError(f"heading in line {token.map[0] + 1} does not have an id")
 85
 86        level = int(token.tag[1:]) # because tag = h1..h6
 87        if level > last_heading_level + 1:
 88            assert token.map
 89            raise RuntimeError(f"heading in line {token.map[0] + 1} skips one or more heading levels, "
 90                               "which is currently not allowed")
 91        last_heading_level = level
 92
 93    if kind == 'book':
 94        _check_book_structure(tokens)
 95    elif kind == 'part':
 96        _check_part_structure(tokens)
 97    else:
 98        _check_fragment_structure(tokens)
 99
100@dc.dataclass(frozen=True)
101class XrefTarget:
102    id: str
103    """link label for `[](#local-references)`"""
104    title_html: str
105    """toc label"""
106    toc_html: str | None
107    """text for `<title>` tags and `title="..."` attributes"""
108    title: str | None
109    """path to file that contains the anchor"""
110    path: str
111    """whether to drop the `#anchor` from links when expanding xrefs"""
112    drop_fragment: bool = False
113    """whether to drop the `path.html` from links when expanding xrefs.
114       mostly useful for docbook compatibility"""
115    drop_target: bool = False
116
117    def href(self) -> str:
118        path = "" if self.drop_target else html.escape(self.path, True)
119        return path if self.drop_fragment else f"{path}#{html.escape(self.id, True)}"
120
121@dc.dataclass
122class TocEntry(Freezeable):
123    kind: TocEntryType
124    target: XrefTarget
125    parent: TocEntry | None = None
126    prev: TocEntry | None = None
127    next: TocEntry | None = None
128    children: list[TocEntry] = dc.field(default_factory=list)
129    starts_new_chunk: bool = False
130    examples: list[TocEntry] = dc.field(default_factory=list)
131    figures: list[TocEntry] = dc.field(default_factory=list)
132
133    @property
134    def root(self) -> TocEntry:
135        return self.parent.root if self.parent else self
136
137    @classmethod
138    def of(cls, token: Token) -> TocEntry:
139        entry = token.meta.get('TocEntry')
140        if not isinstance(entry, TocEntry):
141            raise RuntimeError('requested toc entry, none found', token)
142        return entry
143
144    @classmethod
145    def collect_and_link(cls, xrefs: dict[str, XrefTarget], tokens: Sequence[Token]) -> TocEntry:
146        entries, examples, figures = cls._collect_entries(xrefs, tokens, 'book')
147
148        def flatten_with_parent(this: TocEntry, parent: TocEntry | None) -> Iterable[TocEntry]:
149            this.parent = parent
150            return itertools.chain([this], *[ flatten_with_parent(c, this) for c in this.children ])
151
152        flat = list(flatten_with_parent(entries, None))
153        prev = flat[0]
154        prev.starts_new_chunk = True
155        paths_seen = set([prev.target.path])
156        for c in flat[1:]:
157            if prev.target.path != c.target.path and c.target.path not in paths_seen:
158                c.starts_new_chunk = True
159                c.prev, prev.next = prev, c
160                prev = c
161            paths_seen.add(c.target.path)
162
163        flat[0].examples = examples
164        flat[0].figures = figures
165
166        for c in flat:
167            c.freeze()
168
169        return entries
170
171    @classmethod
172    def _collect_entries(cls, xrefs: dict[str, XrefTarget], tokens: Sequence[Token],
173                         kind: TocEntryType) -> tuple[TocEntry, list[TocEntry], list[TocEntry]]:
174        # we assume that check_structure has been run recursively over the entire input.
175        # list contains (tag, entry) pairs that will collapse to a single entry for
176        # the full sequence.
177        entries: list[tuple[str, TocEntry]] = []
178        examples: list[TocEntry] = []
179        figures: list[TocEntry] = []
180        for token in tokens:
181            if token.type.startswith('included_') and (included := token.meta.get('included')):
182                fragment_type_str = token.type[9:].removesuffix('s')
183                assert fragment_type_str in get_args(TocEntryType)
184                fragment_type = cast(TocEntryType, fragment_type_str)
185                for fragment, _path in included:
186                    subentries, subexamples, subfigures = cls._collect_entries(xrefs, fragment, fragment_type)
187                    entries[-1][1].children.append(subentries)
188                    examples += subexamples
189                    figures += subfigures
190            elif token.type == 'heading_open' and (id := cast(str, token.attrs.get('id', ''))):
191                while len(entries) > 1 and entries[-1][0] >= token.tag:
192                    entries[-2][1].children.append(entries.pop()[1])
193                entries.append((token.tag,
194                                TocEntry(kind if token.tag == 'h1' else 'section', xrefs[id])))
195                token.meta['TocEntry'] = entries[-1][1]
196            elif token.type == 'example_open' and (id := cast(str, token.attrs.get('id', ''))):
197                examples.append(TocEntry('example', xrefs[id]))
198            elif token.type == 'figure_open' and (id := cast(str, token.attrs.get('id', ''))):
199                figures.append(TocEntry('figure', xrefs[id]))
200
201        while len(entries) > 1:
202            entries[-2][1].children.append(entries.pop()[1])
203        return (entries[0][1], examples, figures)