pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/docbook.py at 23.11-beta · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / tools / nix / nixos-render-docs / src / nixos_render_docs / docbook.py
at 23.11-beta 12 kB view raw
  1from collections.abc import Mapping, Sequence
  2from typing import cast, Optional, NamedTuple
  3
  4from markdown_it.token import Token
  5from xml.sax.saxutils import escape, quoteattr
  6
  7from .md import Renderer
  8
  9_xml_id_translate_table = {
 10    ord('*'): ord('_'),
 11    ord('<'): ord('_'),
 12    ord(' '): ord('_'),
 13    ord('>'): ord('_'),
 14    ord('['): ord('_'),
 15    ord(']'): ord('_'),
 16    ord(':'): ord('_'),
 17    ord('"'): ord('_'),
 18}
 19def make_xml_id(s: str) -> str:
 20    return s.translate(_xml_id_translate_table)
 21
 22class Deflist:
 23    has_dd = False
 24
 25class Heading(NamedTuple):
 26    container_tag: str
 27    level: int
 28    # special handling for <part> titles: whether partinfo was already closed from elsewhere
 29    # or still needs closing.
 30    partintro_closed: bool = False
 31
 32class DocBookRenderer(Renderer):
 33    _link_tags: list[str]
 34    _deflists: list[Deflist]
 35    _headings: list[Heading]
 36    _attrspans: list[str]
 37
 38    def __init__(self, manpage_urls: Mapping[str, str]):
 39        super().__init__(manpage_urls)
 40        self._link_tags = []
 41        self._deflists = []
 42        self._headings = []
 43        self._attrspans = []
 44
 45    def render(self, tokens: Sequence[Token]) -> str:
 46        result = super().render(tokens)
 47        result += self._close_headings(None)
 48        return result
 49    def renderInline(self, tokens: Sequence[Token]) -> str:
 50        # HACK to support docbook links and xrefs. link handling is only necessary because the docbook
 51        # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect.
 52        for i, token in enumerate(tokens):
 53            if token.type != 'link_open':
 54                continue
 55            token.tag = 'link'
 56            # turn [](#foo) into xrefs
 57            if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': # type: ignore[index]
 58                token.tag = "xref"
 59            # turn <x> into links without contents
 60            if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']:
 61                tokens[i + 1].content = ''
 62
 63        return super().renderInline(tokens)
 64
 65    def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 66        return escape(token.content)
 67    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 68        return "<para>"
 69    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 70        return "</para>"
 71    def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 72        return "<literallayout>\n</literallayout>"
 73    def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 74        # should check options.breaks() and emit hard break if so
 75        return "\n"
 76    def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 77        return f"<literal>{escape(token.content)}</literal>"
 78    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 79        return f"<programlisting>{escape(token.content)}</programlisting>"
 80    def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 81        self._link_tags.append(token.tag)
 82        href = cast(str, token.attrs['href'])
 83        (attr, start) = ('linkend', 1) if href[0] == '#' else ('xlink:href', 0)
 84        return f"<{token.tag} {attr}={quoteattr(href[start:])}>"
 85    def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 86        return f"</{self._link_tags.pop()}>"
 87    def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 88        return "<listitem>"
 89    def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 90        return "</listitem>\n"
 91    # HACK open and close para for docbook change size. remove soon.
 92    def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 93        spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
 94        return f"<para><itemizedlist{spacing}>\n"
 95    def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 96        return "\n</itemizedlist></para>"
 97    def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
 98        return "<emphasis>"
 99    def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
100        return "</emphasis>"
101    def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
102        return "<emphasis role=\"strong\">"
103    def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
104        return "</emphasis>"
105    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
106        info = f" language={quoteattr(token.info)}" if token.info != "" else ""
107        return f"<programlisting{info}>{escape(token.content)}</programlisting>"
108    def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
109        return "<para><blockquote>"
110    def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
111        return "</blockquote></para>"
112    def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
113        return "<para><note>"
114    def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
115        return "</note></para>"
116    def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
117        return "<para><caution>"
118    def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
119        return "</caution></para>"
120    def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
121        return "<para><important>"
122    def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
123        return "</important></para>"
124    def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
125        return "<para><tip>"
126    def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
127        return "</tip></para>"
128    def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
129        return "<para><warning>"
130    def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
131        return "</warning></para>"
132    # markdown-it emits tokens based on the html syntax tree, but docbook is
133    # slightly different. html has <dl>{<dt/>{<dd/>}}</dl>,
134    # docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist>
135    # we have to reject multiple definitions for the same term for time being.
136    def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
137        self._deflists.append(Deflist())
138        return "<para><variablelist>"
139    def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
140        self._deflists.pop()
141        return "</variablelist></para>"
142    def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
143        self._deflists[-1].has_dd = False
144        return "<varlistentry><term>"
145    def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
146        return "</term>"
147    def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
148        if self._deflists[-1].has_dd:
149            raise Exception("multiple definitions per term not supported")
150        self._deflists[-1].has_dd = True
151        return "<listitem>"
152    def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
153        return "</listitem></varlistentry>"
154    def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
155        if token.meta['name'] == 'command':
156            return f"<command>{escape(token.content)}</command>"
157        if token.meta['name'] == 'file':
158            return f"<filename>{escape(token.content)}</filename>"
159        if token.meta['name'] == 'var':
160            return f"<varname>{escape(token.content)}</varname>"
161        if token.meta['name'] == 'env':
162            return f"<envar>{escape(token.content)}</envar>"
163        if token.meta['name'] == 'option':
164            return f"<option>{escape(token.content)}</option>"
165        if token.meta['name'] == 'manpage':
166            [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
167            section = section[:-1]
168            man = f"{page}({section})"
169            title = f"<refentrytitle>{escape(page)}</refentrytitle>"
170            vol = f"<manvolnum>{escape(section)}</manvolnum>"
171            ref = f"<citerefentry>{title}{vol}</citerefentry>"
172            if man in self._manpage_urls:
173                return f"<link xlink:href={quoteattr(self._manpage_urls[man])}>{ref}</link>"
174            else:
175                return ref
176        raise NotImplementedError("md node not supported yet", token)
177    def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
178        # we currently support *only* inline anchors and the special .keycap class to produce
179        # <keycap> docbook elements.
180        (id_part, class_part) = ("", "")
181        if s := token.attrs.get('id'):
182            id_part = f'<anchor xml:id={quoteattr(cast(str, s))} />'
183        if s := token.attrs.get('class'):
184            if s == 'keycap':
185                class_part = "<keycap>"
186                self._attrspans.append("</keycap>")
187            else:
188                return super().attr_span_begin(token, tokens, i)
189        else:
190            self._attrspans.append("")
191        return id_part + class_part
192    def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
193        return self._attrspans.pop()
194    def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
195        start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else ""
196        spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
197        return f"<orderedlist{start}{spacing}>"
198    def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
199        return "</orderedlist>"
200    def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
201        hlevel = int(token.tag[1:])
202        result = self._close_headings(hlevel)
203        (tag, attrs) = self._heading_tag(token, tokens, i)
204        self._headings.append(Heading(tag, hlevel))
205        attrs_str = "".join([ f" {k}={quoteattr(v)}" for k, v in attrs.items() ])
206        return result + f'<{tag}{attrs_str}>\n<title>'
207    def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
208        heading = self._headings[-1]
209        result = '</title>'
210        if heading.container_tag == 'part':
211            # generate the same ids as were previously assigned manually. if this collides we
212            # rely on outside schema validation to catch it!
213            maybe_id = ""
214            assert tokens[i - 2].type == 'heading_open'
215            if id := cast(str, tokens[i - 2].attrs.get('id', "")):
216                maybe_id = " xml:id=" + quoteattr(id + "-intro")
217            result += f"<partintro{maybe_id}>"
218        return result
219    def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
220        if id := cast(str, token.attrs.get('id', '')):
221            id = f'xml:id={quoteattr(id)}' if id else ''
222        return f'<example {id}>'
223    def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
224        return "</example>"
225    def example_title_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
226        return "<title>"
227    def example_title_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
228        return "</title>"
229
230    def _close_headings(self, level: Optional[int]) -> str:
231        # we rely on markdown-it producing h{1..6} tags in token.tag for this to work
232        result = []
233        while len(self._headings):
234            if level is None or self._headings[-1].level >= level:
235                heading = self._headings.pop()
236                if heading.container_tag == 'part' and not heading.partintro_closed:
237                    result.append("</partintro>")
238                result.append(f"</{heading.container_tag}>")
239            else:
240                break
241        return "\n".join(result)
242
243    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
244        attrs = {}
245        if id := token.attrs.get('id'):
246            attrs['xml:id'] = cast(str, id)
247        return ("section", attrs)