1from collections.abc import Mapping, Sequence 2from typing import cast, Optional, NamedTuple 3 4from markdown_it.token import Token 5from xml.sax.saxutils import escape, quoteattr 6 7from .md import Renderer 8 9_xml_id_translate_table = { 10 ord('*'): ord('_'), 11 ord('<'): ord('_'), 12 ord(' '): ord('_'), 13 ord('>'): ord('_'), 14 ord('['): ord('_'), 15 ord(']'): ord('_'), 16 ord(':'): ord('_'), 17 ord('"'): ord('_'), 18} 19def make_xml_id(s: str) -> str: 20 return s.translate(_xml_id_translate_table) 21 22class Deflist: 23 has_dd = False 24 25class Heading(NamedTuple): 26 container_tag: str 27 level: int 28 # special handling for <part> titles: whether partinfo was already closed from elsewhere 29 # or still needs closing. 30 partintro_closed: bool = False 31 32class DocBookRenderer(Renderer): 33 _link_tags: list[str] 34 _deflists: list[Deflist] 35 _headings: list[Heading] 36 _attrspans: list[str] 37 38 def __init__(self, manpage_urls: Mapping[str, str]): 39 super().__init__(manpage_urls) 40 self._link_tags = [] 41 self._deflists = [] 42 self._headings = [] 43 self._attrspans = [] 44 45 def render(self, tokens: Sequence[Token]) -> str: 46 result = super().render(tokens) 47 result += self._close_headings(None) 48 return result 49 def renderInline(self, tokens: Sequence[Token]) -> str: 50 # HACK to support docbook links and xrefs. link handling is only necessary because the docbook 51 # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect. 52 for i, token in enumerate(tokens): 53 if token.type != 'link_open': 54 continue 55 token.tag = 'link' 56 # turn [](#foo) into xrefs 57 if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': # type: ignore[index] 58 token.tag = "xref" 59 # turn <x> into links without contents 60 if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']: 61 tokens[i + 1].content = '' 62 63 return super().renderInline(tokens) 64 65 def text(self, token: Token, tokens: Sequence[Token], i: int) -> str: 66 return escape(token.content) 67 def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 68 return "<para>" 69 def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 70 return "</para>" 71 def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: 72 return "<literallayout>\n</literallayout>" 73 def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: 74 # should check options.breaks() and emit hard break if so 75 return "\n" 76 def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str: 77 return f"<literal>{escape(token.content)}</literal>" 78 def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str: 79 return f"<programlisting>{escape(token.content)}</programlisting>" 80 def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 81 self._link_tags.append(token.tag) 82 href = cast(str, token.attrs['href']) 83 (attr, start) = ('linkend', 1) if href[0] == '#' else ('xlink:href', 0) 84 return f"<{token.tag} {attr}={quoteattr(href[start:])}>" 85 def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 86 return f"</{self._link_tags.pop()}>" 87 def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 88 return "<listitem>" 89 def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 90 return "</listitem>\n" 91 # HACK open and close para for docbook change size. remove soon. 92 def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 93 spacing = ' spacing="compact"' if token.meta.get('compact', False) else '' 94 return f"<para><itemizedlist{spacing}>\n" 95 def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 96 return "\n</itemizedlist></para>" 97 def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 98 return "<emphasis>" 99 def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 100 return "</emphasis>" 101 def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 102 return "<emphasis role=\"strong\">" 103 def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 104 return "</emphasis>" 105 def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str: 106 info = f" language={quoteattr(token.info)}" if token.info != "" else "" 107 return f"<programlisting{info}>{escape(token.content)}</programlisting>" 108 def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 109 return "<para><blockquote>" 110 def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 111 return "</blockquote></para>" 112 def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 113 return "<para><note>" 114 def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 115 return "</note></para>" 116 def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 117 return "<para><caution>" 118 def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 119 return "</caution></para>" 120 def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 121 return "<para><important>" 122 def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 123 return "</important></para>" 124 def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 125 return "<para><tip>" 126 def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 127 return "</tip></para>" 128 def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 129 return "<para><warning>" 130 def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 131 return "</warning></para>" 132 # markdown-it emits tokens based on the html syntax tree, but docbook is 133 # slightly different. html has <dl>{<dt/>{<dd/>}}</dl>, 134 # docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist> 135 # we have to reject multiple definitions for the same term for time being. 136 def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 137 self._deflists.append(Deflist()) 138 return "<para><variablelist>" 139 def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 140 self._deflists.pop() 141 return "</variablelist></para>" 142 def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 143 self._deflists[-1].has_dd = False 144 return "<varlistentry><term>" 145 def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 146 return "</term>" 147 def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 148 if self._deflists[-1].has_dd: 149 raise Exception("multiple definitions per term not supported") 150 self._deflists[-1].has_dd = True 151 return "<listitem>" 152 def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 153 return "</listitem></varlistentry>" 154 def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str: 155 if token.meta['name'] == 'command': 156 return f"<command>{escape(token.content)}</command>" 157 if token.meta['name'] == 'file': 158 return f"<filename>{escape(token.content)}</filename>" 159 if token.meta['name'] == 'var': 160 return f"<varname>{escape(token.content)}</varname>" 161 if token.meta['name'] == 'env': 162 return f"<envar>{escape(token.content)}</envar>" 163 if token.meta['name'] == 'option': 164 return f"<option>{escape(token.content)}</option>" 165 if token.meta['name'] == 'manpage': 166 [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ] 167 section = section[:-1] 168 man = f"{page}({section})" 169 title = f"<refentrytitle>{escape(page)}</refentrytitle>" 170 vol = f"<manvolnum>{escape(section)}</manvolnum>" 171 ref = f"<citerefentry>{title}{vol}</citerefentry>" 172 if man in self._manpage_urls: 173 return f"<link xlink:href={quoteattr(self._manpage_urls[man])}>{ref}</link>" 174 else: 175 return ref 176 raise NotImplementedError("md node not supported yet", token) 177 def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str: 178 # we currently support *only* inline anchors and the special .keycap class to produce 179 # <keycap> docbook elements. 180 (id_part, class_part) = ("", "") 181 if s := token.attrs.get('id'): 182 id_part = f'<anchor xml:id={quoteattr(cast(str, s))} />' 183 if s := token.attrs.get('class'): 184 if s == 'keycap': 185 class_part = "<keycap>" 186 self._attrspans.append("</keycap>") 187 else: 188 return super().attr_span_begin(token, tokens, i) 189 else: 190 self._attrspans.append("") 191 return id_part + class_part 192 def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str: 193 return self._attrspans.pop() 194 def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 195 start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else "" 196 spacing = ' spacing="compact"' if token.meta.get('compact', False) else '' 197 return f"<orderedlist{start}{spacing}>" 198 def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 199 return "</orderedlist>" 200 def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 201 hlevel = int(token.tag[1:]) 202 result = self._close_headings(hlevel) 203 (tag, attrs) = self._heading_tag(token, tokens, i) 204 self._headings.append(Heading(tag, hlevel)) 205 attrs_str = "".join([ f" {k}={quoteattr(v)}" for k, v in attrs.items() ]) 206 return result + f'<{tag}{attrs_str}>\n<title>' 207 def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 208 heading = self._headings[-1] 209 result = '</title>' 210 if heading.container_tag == 'part': 211 # generate the same ids as were previously assigned manually. if this collides we 212 # rely on outside schema validation to catch it! 213 maybe_id = "" 214 assert tokens[i - 2].type == 'heading_open' 215 if id := cast(str, tokens[i - 2].attrs.get('id', "")): 216 maybe_id = " xml:id=" + quoteattr(id + "-intro") 217 result += f"<partintro{maybe_id}>" 218 return result 219 def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 220 if id := cast(str, token.attrs.get('id', '')): 221 id = f'xml:id={quoteattr(id)}' if id else '' 222 return f'<example {id}>' 223 def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 224 return "</example>" 225 def example_title_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 226 return "<title>" 227 def example_title_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 228 return "</title>" 229 230 def _close_headings(self, level: Optional[int]) -> str: 231 # we rely on markdown-it producing h{1..6} tags in token.tag for this to work 232 result = [] 233 while len(self._headings): 234 if level is None or self._headings[-1].level >= level: 235 heading = self._headings.pop() 236 if heading.container_tag == 'part' and not heading.partintro_closed: 237 result.append("</partintro>") 238 result.append(f"</{heading.container_tag}>") 239 else: 240 break 241 return "\n".join(result) 242 243 def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]: 244 attrs = {} 245 if id := token.attrs.get('id'): 246 attrs['xml:id'] = cast(str, id) 247 return ("section", attrs)