pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manpage.py at 23.11-beta · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / tools / nix / nixos-render-docs / src / nixos_render_docs / manpage.py
at 23.11-beta 13 kB view raw
  1from collections.abc import Mapping, Sequence
  2from dataclasses import dataclass
  3from typing import cast, Iterable, Optional
  4
  5import re
  6
  7from markdown_it.token import Token
  8
  9from .md import Renderer
 10
 11# roff(7) says:
 12#
 13# > roff documents may contain only graphable 7-bit ASCII characters, the space character,
 14# > and, in certain circumstances, the tab character. The backslash character ‘\’ indicates
 15# > the start of an escape sequence […]
 16#
 17# mandoc_char(7) says about the `'~^ characters:
 18#
 19# > In prose, this automatic substitution is often desirable; but when these characters have
 20# > to be displayed as plain ASCII characters, for example in source code samples, they require
 21# > escaping to render as follows:
 22#
 23# since we don't want these to be touched anywhere (because markdown will do all substituations
 24# we want to have) we'll escape those as well. we also escape " (macro metacharacter), - (might
 25# turn into a typographic hyphen), and . (roff request marker at SOL, changes spacing semantics
 26# at EOL). groff additionally does not allow unicode escapes for codepoints below U+0080, so
 27# those need "proper" roff escapes/replacements instead.
 28_roff_unicode = re.compile(r'''[^\n !#$%&()*+,\-./0-9:;<=>?@A-Z[\\\]_a-z{|}]''', re.ASCII)
 29_roff_escapes = {
 30    ord('"'): "\\(dq",
 31    ord("'"): "\\(aq",
 32    ord('-'): "\\-",
 33    ord('.'): "\\&.",
 34    ord('\\'): "\\e",
 35    ord('^'): "\\(ha",
 36    ord('`'): "\\(ga",
 37    ord('~'): "\\(ti",
 38}
 39def man_escape(s: str) -> str:
 40    s = s.translate(_roff_escapes)
 41    return _roff_unicode.sub(lambda m: f"\\[u{ord(m[0]):04X}]", s)
 42
 43# remove leading and trailing spaces from links and condense multiple consecutive spaces
 44# into a single space for presentation parity with html. this is currently easiest with
 45# regex postprocessing and some marker characters. since we don't want to drop spaces
 46# from code blocks we will have to specially protect *inline* code (luckily not block code)
 47# so normalization can turn the spaces inside it into regular spaces again.
 48_normalize_space_re = re.compile(r'''\u0000 < *| *>\u0000 |(?<= ) +''')
 49def _normalize_space(s: str) -> str:
 50    return _normalize_space_re.sub("", s).replace("\0p", " ")
 51
 52def _protect_spaces(s: str) -> str:
 53    return s.replace(" ", "\0p")
 54
 55@dataclass(kw_only=True)
 56class List:
 57    width: int
 58    next_idx: Optional[int] = None
 59    compact: bool
 60    first_item_seen: bool = False
 61
 62# this renderer assumed that it produces a set of lines as output, and that those lines will
 63# be pasted as-is into a larger output. no prefixing or suffixing is allowed for correctness.
 64#
 65# NOTE that we output exclusively physical markup. this is because we have to use the older
 66# mandoc(7) format instead of the newer mdoc(7) format due to limitations in groff: while
 67# using mdoc in groff works fine it is not a native format and thus very slow to render on
 68# manpages as large as configuration.nix.5. mandoc(1) renders both really quickly, but with
 69# groff being our predominant manpage viewer we have to optimize for groff instead.
 70#
 71# while we do use only physical markup (adjusting indentation with .RS and .RE, adding
 72# vertical spacing with .sp, \f[BIRP] escapes for bold/italic/roman/$previous font, \h for
 73# horizontal motion in a line) we do attempt to copy the style of mdoc(7) semantic requests
 74# as appropriate for each markup element.
 75class ManpageRenderer(Renderer):
 76    # whether to emit mdoc .Ql equivalents for inline code or just the contents. this is
 77    # mainly used by the options manpage converter to not emit extra quotes in defaults
 78    # and examples where it's already clear from context that the following text is code.
 79    inline_code_is_quoted: bool = True
 80    link_footnotes: Optional[list[str]] = None
 81
 82    _href_targets: dict[str, str]
 83
 84    _link_stack: list[str]
 85    _do_parbreak_stack: list[bool]
 86    _list_stack: list[List]
 87    _font_stack: list[str]
 88
 89    def __init__(self, manpage_urls: Mapping[str, str], href_targets: dict[str, str]):
 90        super().__init__(manpage_urls)
 91        self._href_targets = href_targets
 92        self._link_stack = []
 93        self._do_parbreak_stack = []
 94        self._list_stack = []
 95        self._font_stack = []
 96
 97    def _join_block(self, ls: Iterable[str]) -> str:
 98        return "\n".join([ l for l in ls if len(l) ])
 99    def _join_inline(self, ls: Iterable[str]) -> str:
100        return _normalize_space(super()._join_inline(ls))
101
102    def _enter_block(self) -> None:
103        self._do_parbreak_stack.append(False)
104    def _leave_block(self) -> None:
105        self._do_parbreak_stack.pop()
106        self._do_parbreak_stack[-1] = True
107    def _maybe_parbreak(self, suffix: str = "") -> str:
108        result = f".sp{suffix}" if self._do_parbreak_stack[-1] else ""
109        self._do_parbreak_stack[-1] = True
110        return result
111
112    def _admonition_open(self, kind: str) -> str:
113        self._enter_block()
114        return (
115            '.sp\n'
116            '.RS 4\n'
117            f'\\fB{kind}\\fP\n'
118            '.br'
119        )
120    def _admonition_close(self) -> str:
121        self._leave_block()
122        return ".RE"
123
124    def render(self, tokens: Sequence[Token]) -> str:
125        self._do_parbreak_stack = [ False ]
126        self._font_stack = [ "\\fR" ]
127        return super().render(tokens)
128
129    def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
130        return man_escape(token.content)
131    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
132        return self._maybe_parbreak()
133    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
134        return ""
135    def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
136        return ".br"
137    def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
138        return " "
139    def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
140        s = _protect_spaces(man_escape(token.content))
141        return f"\\fR\\(oq{s}\\(cq\\fP" if self.inline_code_is_quoted else s
142    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
143        return self.fence(token, tokens, i)
144    def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
145        href = cast(str, token.attrs['href'])
146        self._link_stack.append(href)
147        text = ""
148        if tokens[i + 1].type == 'link_close' and href in self._href_targets:
149            # TODO error or warning if the target can't be resolved
150            text = self._href_targets[href]
151        self._font_stack.append("\\fB")
152        return f"\\fB{text}\0 <"
153    def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
154        href = self._link_stack.pop()
155        text = ""
156        if self.link_footnotes is not None:
157            try:
158                idx = self.link_footnotes.index(href) + 1
159            except ValueError:
160                self.link_footnotes.append(href)
161                idx = len(self.link_footnotes)
162            text = "\\fR" + man_escape(f"[{idx}]")
163        self._font_stack.pop()
164        return f">\0 {text}{self._font_stack[-1]}"
165    def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
166        self._enter_block()
167        lst = self._list_stack[-1]
168        maybe_space = '' if lst.compact or not lst.first_item_seen else '.sp\n'
169        lst.first_item_seen = True
170        head = "•"
171        if lst.next_idx is not None:
172            head = f"{lst.next_idx}."
173            lst.next_idx += 1
174        return (
175            f'{maybe_space}'
176            f'.RS {lst.width}\n'
177            f"\\h'-{len(head) + 1}'\\fB{man_escape(head)}\\fP\\h'1'\\c"
178        )
179    def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
180        self._leave_block()
181        return ".RE"
182    def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
183        self._list_stack.append(List(width=4, compact=bool(token.meta['compact'])))
184        return self._maybe_parbreak()
185    def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
186        self._list_stack.pop()
187        return ""
188    def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
189        self._font_stack.append("\\fI")
190        return "\\fI"
191    def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
192        self._font_stack.pop()
193        return self._font_stack[-1]
194    def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
195        self._font_stack.append("\\fB")
196        return "\\fB"
197    def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
198        self._font_stack.pop()
199        return self._font_stack[-1]
200    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
201        s = man_escape(token.content).rstrip('\n')
202        return (
203            '.sp\n'
204            '.RS 4\n'
205            '.nf\n'
206            f'{s}\n'
207            '.fi\n'
208            '.RE'
209        )
210    def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
211        maybe_par = self._maybe_parbreak("\n")
212        self._enter_block()
213        return (
214            f"{maybe_par}"
215            ".RS 4\n"
216            f"\\h'-3'\\fI\\(lq\\(rq\\fP\\h'1'\\c"
217        )
218    def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
219        self._leave_block()
220        return ".RE"
221    def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
222        return self._admonition_open("Note")
223    def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
224        return self._admonition_close()
225    def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
226        return self._admonition_open( "Caution")
227    def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
228        return self._admonition_close()
229    def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
230        return self._admonition_open( "Important")
231    def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
232        return self._admonition_close()
233    def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
234        return self._admonition_open( "Tip")
235    def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
236        return self._admonition_close()
237    def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
238        return self._admonition_open( "Warning")
239    def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
240        return self._admonition_close()
241    def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
242        return ".RS 4"
243    def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
244        return ".RE"
245    def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
246        return ".PP"
247    def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
248        return ""
249    def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
250        self._enter_block()
251        return ".RS 4"
252    def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
253        self._leave_block()
254        return ".RE"
255    def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
256        if token.meta['name'] in [ 'command', 'env', 'option' ]:
257            return f'\\fB{man_escape(token.content)}\\fP'
258        elif token.meta['name'] in [ 'file', 'var' ]:
259            return f'\\fI{man_escape(token.content)}\\fP'
260        elif token.meta['name'] == 'manpage':
261            [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
262            section = section[:-1]
263            return f'\\fB{man_escape(page)}\\fP\\fR({man_escape(section)})\\fP'
264        else:
265            raise NotImplementedError("md node not supported yet", token)
266    def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
267        # mdoc knows no anchors so we can drop those, but classes must be rejected.
268        if 'class' in token.attrs:
269            return super().attr_span_begin(token, tokens, i)
270        return ""
271    def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
272        return ""
273    def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
274        raise RuntimeError("md token not supported in manpages", token)
275    def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
276        raise RuntimeError("md token not supported in manpages", token)
277    def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
278        # max item head width for a number, a dot, and one leading space and one trailing space
279        width = 3 + len(str(cast(int, token.meta['end'])))
280        self._list_stack.append(
281            List(width    = width,
282                 next_idx = cast(int, token.attrs.get('start', 1)),
283                 compact  = bool(token.meta['compact'])))
284        return self._maybe_parbreak()
285    def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
286        self._list_stack.pop()
287        return ""