1from collections.abc import Mapping, Sequence 2from dataclasses import dataclass 3from typing import cast, Iterable, Optional 4 5import re 6 7from markdown_it.token import Token 8 9from .md import Renderer 10 11# roff(7) says: 12# 13# > roff documents may contain only graphable 7-bit ASCII characters, the space character, 14# > and, in certain circumstances, the tab character. The backslash character ‘\’ indicates 15# > the start of an escape sequence […] 16# 17# mandoc_char(7) says about the `'~^ characters: 18# 19# > In prose, this automatic substitution is often desirable; but when these characters have 20# > to be displayed as plain ASCII characters, for example in source code samples, they require 21# > escaping to render as follows: 22# 23# since we don't want these to be touched anywhere (because markdown will do all substituations 24# we want to have) we'll escape those as well. we also escape " (macro metacharacter), - (might 25# turn into a typographic hyphen), and . (roff request marker at SOL, changes spacing semantics 26# at EOL). groff additionally does not allow unicode escapes for codepoints below U+0080, so 27# those need "proper" roff escapes/replacements instead. 28_roff_unicode = re.compile(r'''[^\n !#$%&()*+,\-./0-9:;<=>?@A-Z[\\\]_a-z{|}]''', re.ASCII) 29_roff_escapes = { 30 ord('"'): "\\(dq", 31 ord("'"): "\\(aq", 32 ord('-'): "\\-", 33 ord('.'): "\\&.", 34 ord('\\'): "\\e", 35 ord('^'): "\\(ha", 36 ord('`'): "\\(ga", 37 ord('~'): "\\(ti", 38} 39def man_escape(s: str) -> str: 40 s = s.translate(_roff_escapes) 41 return _roff_unicode.sub(lambda m: f"\\[u{ord(m[0]):04X}]", s) 42 43# remove leading and trailing spaces from links and condense multiple consecutive spaces 44# into a single space for presentation parity with html. this is currently easiest with 45# regex postprocessing and some marker characters. since we don't want to drop spaces 46# from code blocks we will have to specially protect *inline* code (luckily not block code) 47# so normalization can turn the spaces inside it into regular spaces again. 48_normalize_space_re = re.compile(r'''\u0000 < *| *>\u0000 |(?<= ) +''') 49def _normalize_space(s: str) -> str: 50 return _normalize_space_re.sub("", s).replace("\0p", " ") 51 52def _protect_spaces(s: str) -> str: 53 return s.replace(" ", "\0p") 54 55@dataclass(kw_only=True) 56class List: 57 width: int 58 next_idx: Optional[int] = None 59 compact: bool 60 first_item_seen: bool = False 61 62# this renderer assumed that it produces a set of lines as output, and that those lines will 63# be pasted as-is into a larger output. no prefixing or suffixing is allowed for correctness. 64# 65# NOTE that we output exclusively physical markup. this is because we have to use the older 66# mandoc(7) format instead of the newer mdoc(7) format due to limitations in groff: while 67# using mdoc in groff works fine it is not a native format and thus very slow to render on 68# manpages as large as configuration.nix.5. mandoc(1) renders both really quickly, but with 69# groff being our predominant manpage viewer we have to optimize for groff instead. 70# 71# while we do use only physical markup (adjusting indentation with .RS and .RE, adding 72# vertical spacing with .sp, \f[BIRP] escapes for bold/italic/roman/$previous font, \h for 73# horizontal motion in a line) we do attempt to copy the style of mdoc(7) semantic requests 74# as appropriate for each markup element. 75class ManpageRenderer(Renderer): 76 # whether to emit mdoc .Ql equivalents for inline code or just the contents. this is 77 # mainly used by the options manpage converter to not emit extra quotes in defaults 78 # and examples where it's already clear from context that the following text is code. 79 inline_code_is_quoted: bool = True 80 link_footnotes: Optional[list[str]] = None 81 82 _href_targets: dict[str, str] 83 84 _link_stack: list[str] 85 _do_parbreak_stack: list[bool] 86 _list_stack: list[List] 87 _font_stack: list[str] 88 89 def __init__(self, manpage_urls: Mapping[str, str], href_targets: dict[str, str]): 90 super().__init__(manpage_urls) 91 self._href_targets = href_targets 92 self._link_stack = [] 93 self._do_parbreak_stack = [] 94 self._list_stack = [] 95 self._font_stack = [] 96 97 def _join_block(self, ls: Iterable[str]) -> str: 98 return "\n".join([ l for l in ls if len(l) ]) 99 def _join_inline(self, ls: Iterable[str]) -> str: 100 return _normalize_space(super()._join_inline(ls)) 101 102 def _enter_block(self) -> None: 103 self._do_parbreak_stack.append(False) 104 def _leave_block(self) -> None: 105 self._do_parbreak_stack.pop() 106 self._do_parbreak_stack[-1] = True 107 def _maybe_parbreak(self, suffix: str = "") -> str: 108 result = f".sp{suffix}" if self._do_parbreak_stack[-1] else "" 109 self._do_parbreak_stack[-1] = True 110 return result 111 112 def _admonition_open(self, kind: str) -> str: 113 self._enter_block() 114 return ( 115 '.sp\n' 116 '.RS 4\n' 117 f'\\fB{kind}\\fP\n' 118 '.br' 119 ) 120 def _admonition_close(self) -> str: 121 self._leave_block() 122 return ".RE" 123 124 def render(self, tokens: Sequence[Token]) -> str: 125 self._do_parbreak_stack = [ False ] 126 self._font_stack = [ "\\fR" ] 127 return super().render(tokens) 128 129 def text(self, token: Token, tokens: Sequence[Token], i: int) -> str: 130 return man_escape(token.content) 131 def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 132 return self._maybe_parbreak() 133 def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 134 return "" 135 def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: 136 return ".br" 137 def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: 138 return " " 139 def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str: 140 s = _protect_spaces(man_escape(token.content)) 141 return f"\\fR\\(oq{s}\\(cq\\fP" if self.inline_code_is_quoted else s 142 def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str: 143 return self.fence(token, tokens, i) 144 def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 145 href = cast(str, token.attrs['href']) 146 self._link_stack.append(href) 147 text = "" 148 if tokens[i + 1].type == 'link_close' and href in self._href_targets: 149 # TODO error or warning if the target can't be resolved 150 text = self._href_targets[href] 151 self._font_stack.append("\\fB") 152 return f"\\fB{text}\0 <" 153 def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 154 href = self._link_stack.pop() 155 text = "" 156 if self.link_footnotes is not None: 157 try: 158 idx = self.link_footnotes.index(href) + 1 159 except ValueError: 160 self.link_footnotes.append(href) 161 idx = len(self.link_footnotes) 162 text = "\\fR" + man_escape(f"[{idx}]") 163 self._font_stack.pop() 164 return f">\0 {text}{self._font_stack[-1]}" 165 def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 166 self._enter_block() 167 lst = self._list_stack[-1] 168 maybe_space = '' if lst.compact or not lst.first_item_seen else '.sp\n' 169 lst.first_item_seen = True 170 head = "" 171 if lst.next_idx is not None: 172 head = f"{lst.next_idx}." 173 lst.next_idx += 1 174 return ( 175 f'{maybe_space}' 176 f'.RS {lst.width}\n' 177 f"\\h'-{len(head) + 1}'\\fB{man_escape(head)}\\fP\\h'1'\\c" 178 ) 179 def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 180 self._leave_block() 181 return ".RE" 182 def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 183 self._list_stack.append(List(width=4, compact=bool(token.meta['compact']))) 184 return self._maybe_parbreak() 185 def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 186 self._list_stack.pop() 187 return "" 188 def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 189 self._font_stack.append("\\fI") 190 return "\\fI" 191 def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 192 self._font_stack.pop() 193 return self._font_stack[-1] 194 def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 195 self._font_stack.append("\\fB") 196 return "\\fB" 197 def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 198 self._font_stack.pop() 199 return self._font_stack[-1] 200 def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str: 201 s = man_escape(token.content).rstrip('\n') 202 return ( 203 '.sp\n' 204 '.RS 4\n' 205 '.nf\n' 206 f'{s}\n' 207 '.fi\n' 208 '.RE' 209 ) 210 def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 211 maybe_par = self._maybe_parbreak("\n") 212 self._enter_block() 213 return ( 214 f"{maybe_par}" 215 ".RS 4\n" 216 f"\\h'-3'\\fI\\(lq\\(rq\\fP\\h'1'\\c" 217 ) 218 def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 219 self._leave_block() 220 return ".RE" 221 def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 222 return self._admonition_open("Note") 223 def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 224 return self._admonition_close() 225 def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 226 return self._admonition_open( "Caution") 227 def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 228 return self._admonition_close() 229 def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 230 return self._admonition_open( "Important") 231 def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 232 return self._admonition_close() 233 def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 234 return self._admonition_open( "Tip") 235 def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 236 return self._admonition_close() 237 def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 238 return self._admonition_open( "Warning") 239 def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 240 return self._admonition_close() 241 def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 242 return ".RS 4" 243 def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 244 return ".RE" 245 def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 246 return ".PP" 247 def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 248 return "" 249 def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 250 self._enter_block() 251 return ".RS 4" 252 def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 253 self._leave_block() 254 return ".RE" 255 def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str: 256 if token.meta['name'] in [ 'command', 'env', 'option' ]: 257 return f'\\fB{man_escape(token.content)}\\fP' 258 elif token.meta['name'] in [ 'file', 'var' ]: 259 return f'\\fI{man_escape(token.content)}\\fP' 260 elif token.meta['name'] == 'manpage': 261 [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ] 262 section = section[:-1] 263 return f'\\fB{man_escape(page)}\\fP\\fR({man_escape(section)})\\fP' 264 else: 265 raise NotImplementedError("md node not supported yet", token) 266 def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str: 267 # mdoc knows no anchors so we can drop those, but classes must be rejected. 268 if 'class' in token.attrs: 269 return super().attr_span_begin(token, tokens, i) 270 return "" 271 def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str: 272 return "" 273 def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 274 raise RuntimeError("md token not supported in manpages", token) 275 def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 276 raise RuntimeError("md token not supported in manpages", token) 277 def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 278 # max item head width for a number, a dot, and one leading space and one trailing space 279 width = 3 + len(str(cast(int, token.meta['end']))) 280 self._list_stack.append( 281 List(width = width, 282 next_idx = cast(int, token.attrs.get('start', 1)), 283 compact = bool(token.meta['compact']))) 284 return self._maybe_parbreak() 285 def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 286 self._list_stack.pop() 287 return ""