pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manual.py at 23.11-beta · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / tools / nix / nixos-render-docs / src / nixos_render_docs / manual.py
at 23.11-beta 36 kB view raw
  1import argparse
  2import hashlib
  3import html
  4import json
  5import re
  6import xml.sax.saxutils as xml
  7
  8from abc import abstractmethod
  9from collections.abc import Mapping, Sequence
 10from pathlib import Path
 11from typing import Any, cast, ClassVar, Generic, get_args, NamedTuple
 12
 13from markdown_it.token import Token
 14
 15from . import md, options
 16from .docbook import DocBookRenderer, Heading, make_xml_id
 17from .html import HTMLRenderer, UnresolvedXrefError
 18from .manual_structure import check_structure, FragmentType, is_include, TocEntry, TocEntryType, XrefTarget
 19from .md import Converter, Renderer
 20
 21class BaseConverter(Converter[md.TR], Generic[md.TR]):
 22    # per-converter configuration for ns:arg=value arguments to include blocks, following
 23    # the include type. html converters need something like this to support chunking, or
 24    # another external method like the chunktocs docbook uses (but block options seem like
 25    # a much nicer of doing this).
 26    INCLUDE_ARGS_NS: ClassVar[str]
 27    INCLUDE_FRAGMENT_ALLOWED_ARGS: ClassVar[set[str]] = set()
 28    INCLUDE_OPTIONS_ALLOWED_ARGS: ClassVar[set[str]] = set()
 29
 30    _base_paths: list[Path]
 31    _current_type: list[TocEntryType]
 32
 33    def convert(self, infile: Path, outfile: Path) -> None:
 34        self._base_paths = [ infile ]
 35        self._current_type = ['book']
 36        try:
 37            tokens = self._parse(infile.read_text())
 38            self._postprocess(infile, outfile, tokens)
 39            converted = self._renderer.render(tokens)
 40            outfile.write_text(converted)
 41        except Exception as e:
 42            raise RuntimeError(f"failed to render manual {infile}") from e
 43
 44    def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
 45        pass
 46
 47    def _parse(self, src: str) -> list[Token]:
 48        tokens = super()._parse(src)
 49        check_structure(self._current_type[-1], tokens)
 50        for token in tokens:
 51            if not is_include(token):
 52                continue
 53            directive = token.info[12:].split()
 54            if not directive:
 55                continue
 56            args = { k: v for k, _sep, v in map(lambda s: s.partition('='), directive[1:]) }
 57            typ = directive[0]
 58            if typ == 'options':
 59                token.type = 'included_options'
 60                self._process_include_args(token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
 61                self._parse_options(token, args)
 62            else:
 63                fragment_type = typ.removesuffix('s')
 64                if fragment_type not in get_args(FragmentType):
 65                    raise RuntimeError(f"unsupported structural include type '{typ}'")
 66                self._current_type.append(cast(FragmentType, fragment_type))
 67                token.type = 'included_' + typ
 68                self._process_include_args(token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
 69                self._parse_included_blocks(token, args)
 70                self._current_type.pop()
 71        return tokens
 72
 73    def _process_include_args(self, token: Token, args: dict[str, str], allowed: set[str]) -> None:
 74        ns = self.INCLUDE_ARGS_NS + ":"
 75        args = { k[len(ns):]: v for k, v in args.items() if k.startswith(ns) }
 76        if unknown := set(args.keys()) - allowed:
 77            assert token.map
 78            raise RuntimeError(f"unrecognized include argument in line {token.map[0] + 1}", unknown)
 79        token.meta['include-args'] = args
 80
 81    def _parse_included_blocks(self, token: Token, block_args: dict[str, str]) -> None:
 82        assert token.map
 83        included = token.meta['included'] = []
 84        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
 85            line = line.strip()
 86            path = self._base_paths[-1].parent / line
 87            if path in self._base_paths:
 88                raise RuntimeError(f"circular include found in line {lnum}")
 89            try:
 90                self._base_paths.append(path)
 91                with open(path, 'r') as f:
 92                    tokens = self._parse(f.read())
 93                    included.append((tokens, path))
 94                self._base_paths.pop()
 95            except Exception as e:
 96                raise RuntimeError(f"processing included file {path} from line {lnum}") from e
 97
 98    def _parse_options(self, token: Token, block_args: dict[str, str]) -> None:
 99        assert token.map
100
101        items = {}
102        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
103            if len(args := line.split(":", 1)) != 2:
104                raise RuntimeError(f"options directive with no argument in line {lnum}")
105            (k, v) = (args[0].strip(), args[1].strip())
106            if k in items:
107                raise RuntimeError(f"duplicate options directive {k} in line {lnum}")
108            items[k] = v
109        try:
110            id_prefix = items.pop('id-prefix')
111            varlist_id = items.pop('list-id')
112            source = items.pop('source')
113        except KeyError as e:
114            raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}")
115        if items.keys():
116            raise RuntimeError(
117                f"unsupported options directives in block at line {token.map[0] + 1}",
118                " ".join(items.keys()))
119
120        try:
121            with open(self._base_paths[-1].parent / source, 'r') as f:
122                token.meta['id-prefix'] = id_prefix
123                token.meta['list-id'] = varlist_id
124                token.meta['source'] = json.load(f)
125        except Exception as e:
126            raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e
127
128class RendererMixin(Renderer):
129    _toplevel_tag: str
130    _revision: str
131
132    def __init__(self, toplevel_tag: str, revision: str, *args: Any, **kwargs: Any):
133        super().__init__(*args, **kwargs)
134        self._toplevel_tag = toplevel_tag
135        self._revision = revision
136        self.rules |= {
137            'included_sections': lambda *args: self._included_thing("section", *args),
138            'included_chapters': lambda *args: self._included_thing("chapter", *args),
139            'included_preface': lambda *args: self._included_thing("preface", *args),
140            'included_parts': lambda *args: self._included_thing("part", *args),
141            'included_appendix': lambda *args: self._included_thing("appendix", *args),
142            'included_options': self.included_options,
143        }
144
145    def render(self, tokens: Sequence[Token]) -> str:
146        # books get special handling because they have *two* title tags. doing this with
147        # generic code is more complicated than it's worth. the checks above have verified
148        # that both titles actually exist.
149        if self._toplevel_tag == 'book':
150            return self._render_book(tokens)
151
152        return super().render(tokens)
153
154    @abstractmethod
155    def _render_book(self, tokens: Sequence[Token]) -> str:
156        raise NotImplementedError()
157
158    @abstractmethod
159    def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
160        raise NotImplementedError()
161
162    @abstractmethod
163    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
164        raise NotImplementedError()
165
166class ManualDocBookRenderer(RendererMixin, DocBookRenderer):
167    def __init__(self, toplevel_tag: str, revision: str, manpage_urls: Mapping[str, str]):
168        super().__init__(toplevel_tag, revision, manpage_urls)
169
170    def _render_book(self, tokens: Sequence[Token]) -> str:
171        assert tokens[1].children
172        assert tokens[4].children
173        if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))):
174            maybe_id = "xml:id=" + xml.quoteattr(maybe_id)
175        return (f'<book xmlns="http://docbook.org/ns/docbook"'
176                f'      xmlns:xlink="http://www.w3.org/1999/xlink"'
177                f'      {maybe_id} version="5.0">'
178                f'  <title>{self.renderInline(tokens[1].children)}</title>'
179                f'  <subtitle>{self.renderInline(tokens[4].children)}</subtitle>'
180                f'  {super(DocBookRenderer, self).render(tokens[6:])}'
181                f'</book>')
182
183    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
184        (tag, attrs) = super()._heading_tag(token, tokens, i)
185        # render() has already verified that we don't have supernumerary headings and since the
186        # book tag is handled specially we can leave the check this simple
187        if token.tag != 'h1':
188            return (tag, attrs)
189        return (self._toplevel_tag, attrs | {
190            'xmlns': "http://docbook.org/ns/docbook",
191            'xmlns:xlink': "http://www.w3.org/1999/xlink",
192        })
193
194    def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
195        result = []
196        # close existing partintro. the generic render doesn't really need this because
197        # it doesn't have a concept of structure in the way the manual does.
198        if self._headings and self._headings[-1] == Heading('part', 1):
199            result.append("</partintro>")
200            self._headings[-1] = self._headings[-1]._replace(partintro_closed=True)
201        # must nest properly for structural includes. this requires saving at least
202        # the headings stack, but creating new renderers is cheap and much easier.
203        r = ManualDocBookRenderer(tag, self._revision, self._manpage_urls)
204        for (included, path) in token.meta['included']:
205            try:
206                result.append(r.render(included))
207            except Exception as e:
208                raise RuntimeError(f"rendering {path}") from e
209        return "".join(result)
210    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
211        conv = options.DocBookConverter(self._manpage_urls, self._revision, 'fragment',
212                                        token.meta['list-id'], token.meta['id-prefix'])
213        conv.add_options(token.meta['source'])
214        return conv.finalize(fragment=True)
215
216    # TODO minimize docbook diffs with existing conversions. remove soon.
217    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
218        return super().paragraph_open(token, tokens, i) + "\n "
219    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
220        return "\n" + super().paragraph_close(token, tokens, i)
221    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
222        return f"<programlisting>\n{xml.escape(token.content)}</programlisting>"
223    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
224        info = f" language={xml.quoteattr(token.info)}" if token.info != "" else ""
225        return f"<programlisting{info}>\n{xml.escape(token.content)}</programlisting>"
226
227class DocBookConverter(BaseConverter[ManualDocBookRenderer]):
228    INCLUDE_ARGS_NS = "docbook"
229
230    def __init__(self, manpage_urls: Mapping[str, str], revision: str):
231        super().__init__()
232        self._renderer = ManualDocBookRenderer('book', revision, manpage_urls)
233
234
235class HTMLParameters(NamedTuple):
236    generator: str
237    stylesheets: Sequence[str]
238    scripts: Sequence[str]
239    # number of levels in the rendered table of contents. tables are prepended to
240    # the content they apply to (entire document / document chunk / top-level section
241    # of a chapter), setting a depth of 0 omits the respective table.
242    toc_depth: int
243    chunk_toc_depth: int
244    section_toc_depth: int
245    media_dir: Path
246
247class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
248    _base_path: Path
249    _in_dir: Path
250    _html_params: HTMLParameters
251
252    def __init__(self, toplevel_tag: str, revision: str, html_params: HTMLParameters,
253                 manpage_urls: Mapping[str, str], xref_targets: dict[str, XrefTarget],
254                 in_dir: Path, base_path: Path):
255        super().__init__(toplevel_tag, revision, manpage_urls, xref_targets)
256        self._in_dir = in_dir
257        self._base_path = base_path.absolute()
258        self._html_params = html_params
259
260    def _pull_image(self, src: str) -> str:
261        src_path = Path(src)
262        content = (self._in_dir / src_path).read_bytes()
263        # images may be used more than once, but we want to store them only once and
264        # in an easily accessible (ie, not input-file-path-dependent) location without
265        # having to maintain a mapping structure. hashing the file and using the hash
266        # as both the path of the final image provides both.
267        content_hash = hashlib.sha3_256(content).hexdigest()
268        target_name = f"{content_hash}{src_path.suffix}"
269        target_path = self._base_path / self._html_params.media_dir / target_name
270        target_path.write_bytes(content)
271        return f"./{self._html_params.media_dir}/{target_name}"
272
273    def _push(self, tag: str, hlevel_offset: int) -> Any:
274        result = (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset, self._in_dir)
275        self._hlevel_offset += hlevel_offset
276        self._toplevel_tag, self._headings, self._attrspans = tag, [], []
277        return result
278
279    def _pop(self, state: Any) -> None:
280        (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset, self._in_dir) = state
281
282    def _render_book(self, tokens: Sequence[Token]) -> str:
283        assert tokens[4].children
284        title_id = cast(str, tokens[0].attrs.get('id', ""))
285        title = self._xref_targets[title_id].title
286        # subtitles don't have IDs, so we can't use xrefs to get them
287        subtitle = self.renderInline(tokens[4].children)
288
289        toc = TocEntry.of(tokens[0])
290        return "\n".join([
291            self._file_header(toc),
292            ' <div class="book">',
293            '  <div class="titlepage">',
294            '   <div>',
295            f'   <div><h1 class="title"><a id="{html.escape(title_id, True)}"></a>{title}</h1></div>',
296            f'   <div><h2 class="subtitle">{subtitle}</h2></div>',
297            '   </div>',
298            "   <hr />",
299            '  </div>',
300            self._build_toc(tokens, 0),
301            super(HTMLRenderer, self).render(tokens[6:]),
302            ' </div>',
303            self._file_footer(toc),
304        ])
305
306    def _file_header(self, toc: TocEntry) -> str:
307        prev_link, up_link, next_link = "", "", ""
308        prev_a, next_a, parent_title = "", "", "&nbsp;"
309        nav_html = ""
310        home = toc.root
311        if toc.prev:
312            prev_link = f'<link rel="prev" href="{toc.prev.target.href()}" title="{toc.prev.target.title}" />'
313            prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>'
314        if toc.parent:
315            up_link = (
316                f'<link rel="up" href="{toc.parent.target.href()}" '
317                f'title="{toc.parent.target.title}" />'
318            )
319            if (part := toc.parent) and part.kind != 'book':
320                assert part.target.title
321                parent_title = part.target.title
322        if toc.next:
323            next_link = f'<link rel="next" href="{toc.next.target.href()}" title="{toc.next.target.title}" />'
324            next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>'
325        if toc.prev or toc.parent or toc.next:
326            nav_html = "\n".join([
327                '  <div class="navheader">',
328                '   <table width="100%" summary="Navigation header">',
329                '    <tr>',
330                f'    <th colspan="3" align="center">{toc.target.title}</th>',
331                '    </tr>',
332                '    <tr>',
333                f'    <td width="20%" align="left">{prev_a}&nbsp;</td>',
334                f'    <th width="60%" align="center">{parent_title}</th>',
335                f'    <td width="20%" align="right">&nbsp;{next_a}</td>',
336                '    </tr>',
337                '   </table>',
338                '   <hr />',
339                '  </div>',
340            ])
341        return "\n".join([
342            '<?xml version="1.0" encoding="utf-8" standalone="no"?>',
343            '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"',
344            '  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">',
345            '<html xmlns="http://www.w3.org/1999/xhtml">',
346            ' <head>',
347            '  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
348            f' <title>{toc.target.title}</title>',
349            "".join((f'<link rel="stylesheet" type="text/css" href="{html.escape(style, True)}" />'
350                     for style in self._html_params.stylesheets)),
351            "".join((f'<script src="{html.escape(script, True)}" type="text/javascript"></script>'
352                     for script in self._html_params.scripts)),
353            f' <meta name="generator" content="{html.escape(self._html_params.generator, True)}" />',
354            f' <link rel="home" href="{home.target.href()}" title="{home.target.title}" />' if home.target.href() else "",
355            f' {up_link}{prev_link}{next_link}',
356            ' </head>',
357            ' <body>',
358            nav_html,
359        ])
360
361    def _file_footer(self, toc: TocEntry) -> str:
362        # prev, next = self._get_prev_and_next()
363        prev_a, up_a, home_a, next_a = "", "&nbsp;", "&nbsp;", ""
364        prev_text, up_text, next_text = "", "", ""
365        nav_html = ""
366        home = toc.root
367        if toc.prev:
368            prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>'
369            assert toc.prev.target.title
370            prev_text = toc.prev.target.title
371        if toc.parent:
372            home_a = f'<a accesskey="h" href="{home.target.href()}">Home</a>'
373            if toc.parent != home:
374                up_a = f'<a accesskey="u" href="{toc.parent.target.href()}">Up</a>'
375        if toc.next:
376            next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>'
377            assert toc.next.target.title
378            next_text = toc.next.target.title
379        if toc.prev or toc.parent or toc.next:
380            nav_html = "\n".join([
381                '  <div class="navfooter">',
382                '   <hr />',
383                '   <table width="100%" summary="Navigation footer">',
384                '    <tr>',
385                f'    <td width="40%" align="left">{prev_a}&nbsp;</td>',
386                f'    <td width="20%" align="center">{up_a}</td>',
387                f'    <td width="40%" align="right">&nbsp;{next_a}</td>',
388                '    </tr>',
389                '    <tr>',
390                f'     <td width="40%" align="left" valign="top">{prev_text}&nbsp;</td>',
391                f'     <td width="20%" align="center">{home_a}</td>',
392                f'     <td width="40%" align="right" valign="top">&nbsp;{next_text}</td>',
393                '    </tr>',
394                '   </table>',
395                '  </div>',
396            ])
397        return "\n".join([
398            nav_html,
399            ' </body>',
400            '</html>',
401        ])
402
403    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> str:
404        if token.tag == 'h1':
405            return self._toplevel_tag
406        return super()._heading_tag(token, tokens, i)
407    def _build_toc(self, tokens: Sequence[Token], i: int) -> str:
408        toc = TocEntry.of(tokens[i])
409        if toc.kind == 'section' and self._html_params.section_toc_depth < 1:
410            return ""
411        def walk_and_emit(toc: TocEntry, depth: int) -> list[str]:
412            if depth <= 0:
413                return []
414            result = []
415            for child in toc.children:
416                result.append(
417                    f'<dt>'
418                    f' <span class="{html.escape(child.kind, True)}">'
419                    f'  <a href="{child.target.href()}">{child.target.toc_html}</a>'
420                    f' </span>'
421                    f'</dt>'
422                )
423                # we want to look straight through parts because docbook-xsl does too, but it
424                # also makes for more uesful top-level tocs.
425                next_level = walk_and_emit(child, depth - (0 if child.kind == 'part' else 1))
426                if next_level:
427                    result.append(f'<dd><dl>{"".join(next_level)}</dl></dd>')
428            return result
429        def build_list(kind: str, id: str, lst: Sequence[TocEntry]) -> str:
430            if not lst:
431                return ""
432            entries = [
433                f'<dt>{i}. <a href="{e.target.href()}">{e.target.toc_html}</a></dt>'
434                for i, e in enumerate(lst, start=1)
435            ]
436            return (
437                f'<div class="{id}">'
438                f'<p><strong>List of {kind}</strong></p>'
439                f'<dl>{"".join(entries)}</dl>'
440                '</div>'
441            )
442        # we don't want to generate the "Title of Contents" header for sections,
443        # docbook doesn't and it's only distracting clutter unless it's the main table.
444        # we also want to generate tocs only for a top-level section (ie, one that is
445        # not itself contained in another section)
446        print_title = toc.kind != 'section'
447        if toc.kind == 'section':
448            if toc.parent and toc.parent.kind == 'section':
449                toc_depth = 0
450            else:
451                toc_depth = self._html_params.section_toc_depth
452        elif toc.starts_new_chunk and toc.kind != 'book':
453            toc_depth = self._html_params.chunk_toc_depth
454        else:
455            toc_depth = self._html_params.toc_depth
456        if not (items := walk_and_emit(toc, toc_depth)):
457            return ""
458        figures = build_list("Figures", "list-of-figures", toc.figures)
459        examples = build_list("Examples", "list-of-examples", toc.examples)
460        return "".join([
461            f'<div class="toc">',
462            ' <p><strong>Table of Contents</strong></p>' if print_title else "",
463            f' <dl class="toc">'
464            f'  {"".join(items)}'
465            f' </dl>'
466            f'</div>'
467            f'{figures}'
468            f'{examples}'
469        ])
470
471    def _make_hN(self, level: int) -> tuple[str, str]:
472        # for some reason chapters don't increase the hN nesting count in docbook xslts. duplicate
473        # this for consistency.
474        if self._toplevel_tag == 'chapter':
475            level -= 1
476        # TODO docbook compat. these are never useful for us, but not having them breaks manual
477        # compare workflows while docbook is still allowed.
478        style = ""
479        if level + self._hlevel_offset < 3 \
480           and (self._toplevel_tag == 'section' or (self._toplevel_tag == 'chapter' and level > 0)):
481            style = "clear: both"
482        tag, hstyle = super()._make_hN(max(1, level))
483        return tag, style
484
485    def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
486        outer, inner = [], []
487        # since books have no non-include content the toplevel book wrapper will not count
488        # towards nesting depth. other types will have at least a title+id heading which
489        # *does* count towards the nesting depth. chapters give a -1 to included sections
490        # mirroring the special handing in _make_hN. sigh.
491        hoffset = (
492            0 if not self._headings
493            else self._headings[-1].level - 1 if self._toplevel_tag == 'chapter'
494            else self._headings[-1].level
495        )
496        outer.append(self._maybe_close_partintro())
497        into = token.meta['include-args'].get('into-file')
498        fragments = token.meta['included']
499        state = self._push(tag, hoffset)
500        if into:
501            toc = TocEntry.of(fragments[0][0][0])
502            inner.append(self._file_header(toc))
503            # we do not set _hlevel_offset=0 because docbook doesn't either.
504        else:
505            inner = outer
506        in_dir = self._in_dir
507        for included, path in fragments:
508            try:
509                self._in_dir = (in_dir / path).parent
510                inner.append(self.render(included))
511            except Exception as e:
512                raise RuntimeError(f"rendering {path}") from e
513        if into:
514            inner.append(self._file_footer(toc))
515            (self._base_path / into).write_text("".join(inner))
516        self._pop(state)
517        return "".join(outer)
518
519    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
520        conv = options.HTMLConverter(self._manpage_urls, self._revision,
521                                     token.meta['list-id'], token.meta['id-prefix'],
522                                     self._xref_targets)
523        conv.add_options(token.meta['source'])
524        return conv.finalize()
525
526def _to_base26(n: int) -> str:
527    return (_to_base26(n // 26) if n > 26 else "") + chr(ord("A") + n % 26)
528
529class HTMLConverter(BaseConverter[ManualHTMLRenderer]):
530    INCLUDE_ARGS_NS = "html"
531    INCLUDE_FRAGMENT_ALLOWED_ARGS = { 'into-file' }
532
533    _revision: str
534    _html_params: HTMLParameters
535    _manpage_urls: Mapping[str, str]
536    _xref_targets: dict[str, XrefTarget]
537    _redirection_targets: set[str]
538    _appendix_count: int = 0
539
540    def _next_appendix_id(self) -> str:
541        self._appendix_count += 1
542        return _to_base26(self._appendix_count - 1)
543
544    def __init__(self, revision: str, html_params: HTMLParameters, manpage_urls: Mapping[str, str]):
545        super().__init__()
546        self._revision, self._html_params, self._manpage_urls = revision, html_params, manpage_urls
547        self._xref_targets = {}
548        self._redirection_targets = set()
549        # renderer not set on purpose since it has a dependency on the output path!
550
551    def convert(self, infile: Path, outfile: Path) -> None:
552        self._renderer = ManualHTMLRenderer(
553            'book', self._revision, self._html_params, self._manpage_urls, self._xref_targets,
554            infile.parent, outfile.parent)
555        super().convert(infile, outfile)
556
557    def _parse(self, src: str) -> list[Token]:
558        tokens = super()._parse(src)
559        for token in tokens:
560            if not token.type.startswith('included_') \
561               or not (into := token.meta['include-args'].get('into-file')):
562                continue
563            assert token.map
564            if len(token.meta['included']) == 0:
565                raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is empty!")
566            # we use blender-style //path to denote paths relative to the origin file
567            # (usually index.html). this makes everything a lot easier and clearer.
568            if not into.startswith("//") or '/' in into[2:]:
569                raise RuntimeError("html:into-file must be a relative-to-origin //filename", into)
570            into = token.meta['include-args']['into-file'] = into[2:]
571            if into in self._redirection_targets:
572                raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is already in use")
573            self._redirection_targets.add(into)
574        return tokens
575
576    def _number_block(self, block: str, prefix: str, tokens: Sequence[Token], start: int = 1) -> int:
577        title_open, title_close = f'{block}_title_open', f'{block}_title_close'
578        for (i, token) in enumerate(tokens):
579            if token.type == title_open:
580                title = tokens[i + 1]
581                assert title.type == 'inline' and title.children
582                # the prefix is split into two tokens because the xref title_html will want
583                # only the first of the two, but both must be rendered into the example itself.
584                title.children = (
585                    [
586                        Token('text', '', 0, content=f'{prefix} {start}'),
587                        Token('text', '', 0, content='. ')
588                    ] + title.children
589                )
590                start += 1
591            elif token.type.startswith('included_') and token.type != 'included_options':
592                for sub, _path in token.meta['included']:
593                    start = self._number_block(block, prefix, sub, start)
594        return start
595
596    # xref | (id, type, heading inlines, file, starts new file)
597    def _collect_ids(self, tokens: Sequence[Token], target_file: str, typ: str, file_changed: bool
598                     ) -> list[XrefTarget | tuple[str, str, Token, str, bool]]:
599        result: list[XrefTarget | tuple[str, str, Token, str, bool]] = []
600        # collect all IDs and their xref substitutions. headings are deferred until everything
601        # has been parsed so we can resolve links in headings. if that's even used anywhere.
602        for (i, bt) in enumerate(tokens):
603            if bt.type == 'heading_open' and (id := cast(str, bt.attrs.get('id', ''))):
604                result.append((id, typ if bt.tag == 'h1' else 'section', tokens[i + 1], target_file,
605                               i == 0 and file_changed))
606            elif bt.type == 'included_options':
607                id_prefix = bt.meta['id-prefix']
608                for opt in bt.meta['source'].keys():
609                    id = make_xml_id(f"{id_prefix}{opt}")
610                    name = html.escape(opt)
611                    result.append(XrefTarget(id, f'<code class="option">{name}</code>', name, None, target_file))
612            elif bt.type.startswith('included_'):
613                sub_file = bt.meta['include-args'].get('into-file', target_file)
614                subtyp = bt.type.removeprefix('included_').removesuffix('s')
615                for si, (sub, _path) in enumerate(bt.meta['included']):
616                    result += self._collect_ids(sub, sub_file, subtyp, si == 0 and sub_file != target_file)
617            elif bt.type == 'example_open' and (id := cast(str, bt.attrs.get('id', ''))):
618                result.append((id, 'example', tokens[i + 2], target_file, False))
619            elif bt.type == 'figure_open' and (id := cast(str, bt.attrs.get('id', ''))):
620                result.append((id, 'figure', tokens[i + 2], target_file, False))
621            elif bt.type == 'footnote_open' and (id := cast(str, bt.attrs.get('id', ''))):
622                result.append(XrefTarget(id, "???", None, None, target_file))
623            elif bt.type == 'footnote_ref' and (id := cast(str, bt.attrs.get('id', ''))):
624                result.append(XrefTarget(id, "???", None, None, target_file))
625            elif bt.type == 'inline':
626                assert bt.children is not None
627                result += self._collect_ids(bt.children, target_file, typ, False)
628            elif id := cast(str, bt.attrs.get('id', '')):
629                # anchors and examples have no titles we could use, but we'll have to put
630                # *something* here to communicate that there's no title.
631                result.append(XrefTarget(id, "???", None, None, target_file))
632        return result
633
634    def _render_xref(self, id: str, typ: str, inlines: Token, path: str, drop_fragment: bool) -> XrefTarget:
635        assert inlines.children
636        title_html = self._renderer.renderInline(inlines.children)
637        if typ == 'appendix':
638            # NOTE the docbook compat is strong here
639            n = self._next_appendix_id()
640            prefix = f"Appendix\u00A0{n}.\u00A0"
641            # HACK for docbook compat: prefix the title inlines with appendix id if
642            # necessary. the alternative is to mess with titlepage rendering in headings,
643            # which seems just a lot worse than this
644            prefix_tokens = [Token(type='text', tag='', nesting=0, content=prefix)]
645            inlines.children = prefix_tokens + list(inlines.children)
646            title = prefix + title_html
647            toc_html = f"{n}. {title_html}"
648            title_html = f"Appendix&nbsp;{n}"
649        elif typ in ['example', 'figure']:
650            # skip the prepended `{Example,Figure} N. ` from numbering
651            toc_html, title = self._renderer.renderInline(inlines.children[2:]), title_html
652            # xref title wants only the prepended text, sans the trailing colon and space
653            title_html = self._renderer.renderInline(inlines.children[0:1])
654        else:
655            toc_html, title = title_html, title_html
656            title_html = (
657                f"<em>{title_html}</em>"
658                if typ == 'chapter'
659                else title_html if typ in [ 'book', 'part' ]
660                else f'the section called “{title_html}”'
661            )
662        return XrefTarget(id, title_html, toc_html, re.sub('<.*?>', '', title), path, drop_fragment)
663
664    def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
665        self._number_block('example', "Example", tokens)
666        self._number_block('figure', "Figure", tokens)
667        xref_queue = self._collect_ids(tokens, outfile.name, 'book', True)
668
669        failed = False
670        deferred = []
671        while xref_queue:
672            for item in xref_queue:
673                try:
674                    target = item if isinstance(item, XrefTarget) else self._render_xref(*item)
675                except UnresolvedXrefError:
676                    if failed:
677                        raise
678                    deferred.append(item)
679                    continue
680
681                if target.id in self._xref_targets:
682                    raise RuntimeError(f"found duplicate id #{target.id}")
683                self._xref_targets[target.id] = target
684            if len(deferred) == len(xref_queue):
685                failed = True # do another round and report the first error
686            xref_queue = deferred
687
688        paths_seen = set()
689        for t in self._xref_targets.values():
690            paths_seen.add(t.path)
691
692        if len(paths_seen) == 1:
693            for (k, t) in self._xref_targets.items():
694                self._xref_targets[k] = XrefTarget(
695                    t.id,
696                    t.title_html,
697                    t.toc_html,
698                    t.title,
699                    t.path,
700                    t.drop_fragment,
701                    drop_target=True
702                )
703
704        TocEntry.collect_and_link(self._xref_targets, tokens)
705
706
707
708def _build_cli_db(p: argparse.ArgumentParser) -> None:
709    p.add_argument('--manpage-urls', required=True)
710    p.add_argument('--revision', required=True)
711    p.add_argument('infile', type=Path)
712    p.add_argument('outfile', type=Path)
713
714def _build_cli_html(p: argparse.ArgumentParser) -> None:
715    p.add_argument('--manpage-urls', required=True)
716    p.add_argument('--revision', required=True)
717    p.add_argument('--generator', default='nixos-render-docs')
718    p.add_argument('--stylesheet', default=[], action='append')
719    p.add_argument('--script', default=[], action='append')
720    p.add_argument('--toc-depth', default=1, type=int)
721    p.add_argument('--chunk-toc-depth', default=1, type=int)
722    p.add_argument('--section-toc-depth', default=0, type=int)
723    p.add_argument('--media-dir', default="media", type=Path)
724    p.add_argument('infile', type=Path)
725    p.add_argument('outfile', type=Path)
726
727def _run_cli_db(args: argparse.Namespace) -> None:
728    with open(args.manpage_urls, 'r') as manpage_urls:
729        md = DocBookConverter(json.load(manpage_urls), args.revision)
730        md.convert(args.infile, args.outfile)
731
732def _run_cli_html(args: argparse.Namespace) -> None:
733    with open(args.manpage_urls, 'r') as manpage_urls:
734        md = HTMLConverter(
735            args.revision,
736            HTMLParameters(args.generator, args.stylesheet, args.script, args.toc_depth,
737                           args.chunk_toc_depth, args.section_toc_depth, args.media_dir),
738            json.load(manpage_urls))
739        md.convert(args.infile, args.outfile)
740
741def build_cli(p: argparse.ArgumentParser) -> None:
742    formats = p.add_subparsers(dest='format', required=True)
743    _build_cli_db(formats.add_parser('docbook'))
744    _build_cli_html(formats.add_parser('html'))
745
746def run_cli(args: argparse.Namespace) -> None:
747    if args.format == 'docbook':
748        _run_cli_db(args)
749    elif args.format == 'html':
750        _run_cli_html(args)
751    else:
752        raise RuntimeError('format not hooked up', args)