1from collections.abc import Mapping, Sequence
2from dataclasses import dataclass
3from typing import cast, Iterable, Optional
4
5import re
6
7from markdown_it.token import Token
8
9from .md import Renderer
10
11# roff(7) says:
12#
13# > roff documents may contain only graphable 7-bit ASCII characters, the space character,
14# > and, in certain circumstances, the tab character. The backslash character ‘\’ indicates
15# > the start of an escape sequence […]
16#
17# mandoc_char(7) says about the `'~^ characters:
18#
19# > In prose, this automatic substitution is often desirable; but when these characters have
20# > to be displayed as plain ASCII characters, for example in source code samples, they require
21# > escaping to render as follows:
22#
23# since we don't want these to be touched anywhere (because markdown will do all substituations
24# we want to have) we'll escape those as well. we also escape " (macro metacharacter), - (might
25# turn into a typographic hyphen), and . (roff request marker at SOL, changes spacing semantics
26# at EOL). groff additionally does not allow unicode escapes for codepoints below U+0080, so
27# those need "proper" roff escapes/replacements instead.
28_roff_unicode = re.compile(r'''[^\n !#$%&()*+,\-./0-9:;<=>?@A-Z[\\\]_a-z{|}]''', re.ASCII)
29_roff_escapes = {
30 ord('"'): "\\(dq",
31 ord("'"): "\\(aq",
32 ord('-'): "\\-",
33 ord('.'): "\\&.",
34 ord('\\'): "\\e",
35 ord('^'): "\\(ha",
36 ord('`'): "\\(ga",
37 ord('~'): "\\(ti",
38}
39def man_escape(s: str) -> str:
40 s = s.translate(_roff_escapes)
41 return _roff_unicode.sub(lambda m: f"\\[u{ord(m[0]):04X}]", s)
42
43# remove leading and trailing spaces from links and condense multiple consecutive spaces
44# into a single space for presentation parity with html. this is currently easiest with
45# regex postprocessing and some marker characters. since we don't want to drop spaces
46# from code blocks we will have to specially protect *inline* code (luckily not block code)
47# so normalization can turn the spaces inside it into regular spaces again.
48_normalize_space_re = re.compile(r'''\u0000 < *| *>\u0000 |(?<= ) +''')
49def _normalize_space(s: str) -> str:
50 return _normalize_space_re.sub("", s).replace("\0p", " ")
51
52def _protect_spaces(s: str) -> str:
53 return s.replace(" ", "\0p")
54
55@dataclass(kw_only=True)
56class List:
57 width: int
58 next_idx: Optional[int] = None
59 compact: bool
60 first_item_seen: bool = False
61
62# this renderer assumed that it produces a set of lines as output, and that those lines will
63# be pasted as-is into a larger output. no prefixing or suffixing is allowed for correctness.
64#
65# NOTE that we output exclusively physical markup. this is because we have to use the older
66# mandoc(7) format instead of the newer mdoc(7) format due to limitations in groff: while
67# using mdoc in groff works fine it is not a native format and thus very slow to render on
68# manpages as large as configuration.nix.5. mandoc(1) renders both really quickly, but with
69# groff being our predominant manpage viewer we have to optimize for groff instead.
70#
71# while we do use only physical markup (adjusting indentation with .RS and .RE, adding
72# vertical spacing with .sp, \f[BIRP] escapes for bold/italic/roman/$previous font, \h for
73# horizontal motion in a line) we do attempt to copy the style of mdoc(7) semantic requests
74# as appropriate for each markup element.
75class ManpageRenderer(Renderer):
76 # whether to emit mdoc .Ql equivalents for inline code or just the contents. this is
77 # mainly used by the options manpage converter to not emit extra quotes in defaults
78 # and examples where it's already clear from context that the following text is code.
79 inline_code_is_quoted: bool = True
80 link_footnotes: Optional[list[str]] = None
81
82 _href_targets: dict[str, str]
83
84 _link_stack: list[str]
85 _do_parbreak_stack: list[bool]
86 _list_stack: list[List]
87 _font_stack: list[str]
88
89 def __init__(self, manpage_urls: Mapping[str, str], href_targets: dict[str, str]):
90 super().__init__(manpage_urls)
91 self._href_targets = href_targets
92 self._link_stack = []
93 self._do_parbreak_stack = []
94 self._list_stack = []
95 self._font_stack = []
96
97 def _join_block(self, ls: Iterable[str]) -> str:
98 return "\n".join([ l for l in ls if len(l) ])
99 def _join_inline(self, ls: Iterable[str]) -> str:
100 return _normalize_space(super()._join_inline(ls))
101
102 def _enter_block(self) -> None:
103 self._do_parbreak_stack.append(False)
104 def _leave_block(self) -> None:
105 self._do_parbreak_stack.pop()
106 self._do_parbreak_stack[-1] = True
107 def _maybe_parbreak(self, suffix: str = "") -> str:
108 result = f".sp{suffix}" if self._do_parbreak_stack[-1] else ""
109 self._do_parbreak_stack[-1] = True
110 return result
111
112 def _admonition_open(self, kind: str) -> str:
113 self._enter_block()
114 return (
115 '.sp\n'
116 '.RS 4\n'
117 f'\\fB{kind}\\fP\n'
118 '.br'
119 )
120 def _admonition_close(self) -> str:
121 self._leave_block()
122 return ".RE"
123
124 def render(self, tokens: Sequence[Token]) -> str:
125 self._do_parbreak_stack = [ False ]
126 self._font_stack = [ "\\fR" ]
127 return super().render(tokens)
128
129 def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
130 return man_escape(token.content)
131 def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
132 return self._maybe_parbreak()
133 def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
134 return ""
135 def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
136 return ".br"
137 def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
138 return " "
139 def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
140 s = _protect_spaces(man_escape(token.content))
141 return f"\\fR\\(oq{s}\\(cq\\fP" if self.inline_code_is_quoted else s
142 def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
143 return self.fence(token, tokens, i)
144 def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
145 href = cast(str, token.attrs['href'])
146 self._link_stack.append(href)
147 text = ""
148 if tokens[i + 1].type == 'link_close' and href in self._href_targets:
149 # TODO error or warning if the target can't be resolved
150 text = self._href_targets[href]
151 self._font_stack.append("\\fB")
152 return f"\\fB{text}\0 <"
153 def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
154 href = self._link_stack.pop()
155 text = ""
156 if self.link_footnotes is not None:
157 try:
158 idx = self.link_footnotes.index(href) + 1
159 except ValueError:
160 self.link_footnotes.append(href)
161 idx = len(self.link_footnotes)
162 text = "\\fR" + man_escape(f"[{idx}]")
163 self._font_stack.pop()
164 return f">\0 {text}{self._font_stack[-1]}"
165 def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
166 self._enter_block()
167 lst = self._list_stack[-1]
168 maybe_space = '' if lst.compact or not lst.first_item_seen else '.sp\n'
169 lst.first_item_seen = True
170 head = "•"
171 if lst.next_idx is not None:
172 head = f"{lst.next_idx}."
173 lst.next_idx += 1
174 return (
175 f'{maybe_space}'
176 f'.RS {lst.width}\n'
177 f"\\h'-{len(head) + 1}'\\fB{man_escape(head)}\\fP\\h'1'\\c"
178 )
179 def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
180 self._leave_block()
181 return ".RE"
182 def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
183 self._list_stack.append(List(width=4, compact=bool(token.meta['compact'])))
184 return self._maybe_parbreak()
185 def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
186 self._list_stack.pop()
187 return ""
188 def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
189 self._font_stack.append("\\fI")
190 return "\\fI"
191 def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
192 self._font_stack.pop()
193 return self._font_stack[-1]
194 def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
195 self._font_stack.append("\\fB")
196 return "\\fB"
197 def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
198 self._font_stack.pop()
199 return self._font_stack[-1]
200 def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
201 s = man_escape(token.content).rstrip('\n')
202 return (
203 '.sp\n'
204 '.RS 4\n'
205 '.nf\n'
206 f'{s}\n'
207 '.fi\n'
208 '.RE'
209 )
210 def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
211 maybe_par = self._maybe_parbreak("\n")
212 self._enter_block()
213 return (
214 f"{maybe_par}"
215 ".RS 4\n"
216 f"\\h'-3'\\fI\\(lq\\(rq\\fP\\h'1'\\c"
217 )
218 def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
219 self._leave_block()
220 return ".RE"
221 def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
222 return self._admonition_open("Note")
223 def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
224 return self._admonition_close()
225 def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
226 return self._admonition_open( "Caution")
227 def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
228 return self._admonition_close()
229 def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
230 return self._admonition_open( "Important")
231 def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
232 return self._admonition_close()
233 def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
234 return self._admonition_open( "Tip")
235 def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
236 return self._admonition_close()
237 def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
238 return self._admonition_open( "Warning")
239 def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
240 return self._admonition_close()
241 def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
242 return ".RS 4"
243 def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
244 return ".RE"
245 def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
246 return ".PP"
247 def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
248 return ""
249 def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
250 self._enter_block()
251 return ".RS 4"
252 def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
253 self._leave_block()
254 return ".RE"
255 def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
256 if token.meta['name'] in [ 'command', 'env', 'option' ]:
257 return f'\\fB{man_escape(token.content)}\\fP'
258 elif token.meta['name'] in [ 'file', 'var' ]:
259 return f'\\fI{man_escape(token.content)}\\fP'
260 elif token.meta['name'] == 'manpage':
261 [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
262 section = section[:-1]
263 return f'\\fB{man_escape(page)}\\fP\\fR({man_escape(section)})\\fP'
264 else:
265 raise NotImplementedError("md node not supported yet", token)
266 def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
267 # mdoc knows no anchors so we can drop those, but classes must be rejected.
268 if 'class' in token.attrs:
269 return super().attr_span_begin(token, tokens, i)
270 return ""
271 def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
272 return ""
273 def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
274 raise RuntimeError("md token not supported in manpages", token)
275 def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
276 raise RuntimeError("md token not supported in manpages", token)
277 def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
278 # max item head width for a number, a dot, and one leading space and one trailing space
279 width = 3 + len(str(cast(int, token.meta['end'])))
280 self._list_stack.append(
281 List(width = width,
282 next_idx = cast(int, token.attrs.get('start', 1)),
283 compact = bool(token.meta['compact'])))
284 return self._maybe_parbreak()
285 def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
286 self._list_stack.pop()
287 return ""