1from collections.abc import Mapping, Sequence
2from typing import cast, Optional, NamedTuple
3
4from markdown_it.token import Token
5from xml.sax.saxutils import escape, quoteattr
6
7from .md import Renderer
8
9_xml_id_translate_table = {
10 ord('*'): ord('_'),
11 ord('<'): ord('_'),
12 ord(' '): ord('_'),
13 ord('>'): ord('_'),
14 ord('['): ord('_'),
15 ord(']'): ord('_'),
16 ord(':'): ord('_'),
17 ord('"'): ord('_'),
18}
19def make_xml_id(s: str) -> str:
20 return s.translate(_xml_id_translate_table)
21
22class Deflist:
23 has_dd = False
24
25class Heading(NamedTuple):
26 container_tag: str
27 level: int
28 # special handling for <part> titles: whether partinfo was already closed from elsewhere
29 # or still needs closing.
30 partintro_closed: bool = False
31
32class DocBookRenderer(Renderer):
33 _link_tags: list[str]
34 _deflists: list[Deflist]
35 _headings: list[Heading]
36 _attrspans: list[str]
37
38 def __init__(self, manpage_urls: Mapping[str, str]):
39 super().__init__(manpage_urls)
40 self._link_tags = []
41 self._deflists = []
42 self._headings = []
43 self._attrspans = []
44
45 def render(self, tokens: Sequence[Token]) -> str:
46 result = super().render(tokens)
47 result += self._close_headings(None)
48 return result
49 def renderInline(self, tokens: Sequence[Token]) -> str:
50 # HACK to support docbook links and xrefs. link handling is only necessary because the docbook
51 # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect.
52 for i, token in enumerate(tokens):
53 if token.type != 'link_open':
54 continue
55 token.tag = 'link'
56 # turn [](#foo) into xrefs
57 if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': # type: ignore[index]
58 token.tag = "xref"
59 # turn <x> into links without contents
60 if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']:
61 tokens[i + 1].content = ''
62
63 return super().renderInline(tokens)
64
65 def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
66 return escape(token.content)
67 def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
68 return "<para>"
69 def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
70 return "</para>"
71 def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
72 return "<literallayout>\n</literallayout>"
73 def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
74 # should check options.breaks() and emit hard break if so
75 return "\n"
76 def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
77 return f"<literal>{escape(token.content)}</literal>"
78 def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
79 return f"<programlisting>{escape(token.content)}</programlisting>"
80 def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
81 self._link_tags.append(token.tag)
82 href = cast(str, token.attrs['href'])
83 (attr, start) = ('linkend', 1) if href[0] == '#' else ('xlink:href', 0)
84 return f"<{token.tag} {attr}={quoteattr(href[start:])}>"
85 def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
86 return f"</{self._link_tags.pop()}>"
87 def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
88 return "<listitem>"
89 def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
90 return "</listitem>\n"
91 # HACK open and close para for docbook change size. remove soon.
92 def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
93 spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
94 return f"<para><itemizedlist{spacing}>\n"
95 def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
96 return "\n</itemizedlist></para>"
97 def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
98 return "<emphasis>"
99 def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
100 return "</emphasis>"
101 def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
102 return "<emphasis role=\"strong\">"
103 def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
104 return "</emphasis>"
105 def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
106 info = f" language={quoteattr(token.info)}" if token.info != "" else ""
107 return f"<programlisting{info}>{escape(token.content)}</programlisting>"
108 def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
109 return "<para><blockquote>"
110 def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
111 return "</blockquote></para>"
112 def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
113 return "<para><note>"
114 def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
115 return "</note></para>"
116 def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
117 return "<para><caution>"
118 def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
119 return "</caution></para>"
120 def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
121 return "<para><important>"
122 def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
123 return "</important></para>"
124 def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
125 return "<para><tip>"
126 def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
127 return "</tip></para>"
128 def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
129 return "<para><warning>"
130 def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
131 return "</warning></para>"
132 # markdown-it emits tokens based on the html syntax tree, but docbook is
133 # slightly different. html has <dl>{<dt/>{<dd/>}}</dl>,
134 # docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist>
135 # we have to reject multiple definitions for the same term for time being.
136 def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
137 self._deflists.append(Deflist())
138 return "<para><variablelist>"
139 def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
140 self._deflists.pop()
141 return "</variablelist></para>"
142 def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
143 self._deflists[-1].has_dd = False
144 return "<varlistentry><term>"
145 def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
146 return "</term>"
147 def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
148 if self._deflists[-1].has_dd:
149 raise Exception("multiple definitions per term not supported")
150 self._deflists[-1].has_dd = True
151 return "<listitem>"
152 def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
153 return "</listitem></varlistentry>"
154 def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
155 if token.meta['name'] == 'command':
156 return f"<command>{escape(token.content)}</command>"
157 if token.meta['name'] == 'file':
158 return f"<filename>{escape(token.content)}</filename>"
159 if token.meta['name'] == 'var':
160 return f"<varname>{escape(token.content)}</varname>"
161 if token.meta['name'] == 'env':
162 return f"<envar>{escape(token.content)}</envar>"
163 if token.meta['name'] == 'option':
164 return f"<option>{escape(token.content)}</option>"
165 if token.meta['name'] == 'manpage':
166 [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
167 section = section[:-1]
168 man = f"{page}({section})"
169 title = f"<refentrytitle>{escape(page)}</refentrytitle>"
170 vol = f"<manvolnum>{escape(section)}</manvolnum>"
171 ref = f"<citerefentry>{title}{vol}</citerefentry>"
172 if man in self._manpage_urls:
173 return f"<link xlink:href={quoteattr(self._manpage_urls[man])}>{ref}</link>"
174 else:
175 return ref
176 raise NotImplementedError("md node not supported yet", token)
177 def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
178 # we currently support *only* inline anchors and the special .keycap class to produce
179 # <keycap> docbook elements.
180 (id_part, class_part) = ("", "")
181 if s := token.attrs.get('id'):
182 id_part = f'<anchor xml:id={quoteattr(cast(str, s))} />'
183 if s := token.attrs.get('class'):
184 if s == 'keycap':
185 class_part = "<keycap>"
186 self._attrspans.append("</keycap>")
187 else:
188 return super().attr_span_begin(token, tokens, i)
189 else:
190 self._attrspans.append("")
191 return id_part + class_part
192 def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
193 return self._attrspans.pop()
194 def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
195 start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else ""
196 spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
197 return f"<orderedlist{start}{spacing}>"
198 def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
199 return "</orderedlist>"
200 def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
201 hlevel = int(token.tag[1:])
202 result = self._close_headings(hlevel)
203 (tag, attrs) = self._heading_tag(token, tokens, i)
204 self._headings.append(Heading(tag, hlevel))
205 attrs_str = "".join([ f" {k}={quoteattr(v)}" for k, v in attrs.items() ])
206 return result + f'<{tag}{attrs_str}>\n<title>'
207 def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
208 heading = self._headings[-1]
209 result = '</title>'
210 if heading.container_tag == 'part':
211 # generate the same ids as were previously assigned manually. if this collides we
212 # rely on outside schema validation to catch it!
213 maybe_id = ""
214 assert tokens[i - 2].type == 'heading_open'
215 if id := cast(str, tokens[i - 2].attrs.get('id', "")):
216 maybe_id = " xml:id=" + quoteattr(id + "-intro")
217 result += f"<partintro{maybe_id}>"
218 return result
219 def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
220 if id := cast(str, token.attrs.get('id', '')):
221 id = f'xml:id={quoteattr(id)}' if id else ''
222 return f'<example {id}>'
223 def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
224 return "</example>"
225 def example_title_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
226 return "<title>"
227 def example_title_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
228 return "</title>"
229
230 def _close_headings(self, level: Optional[int]) -> str:
231 # we rely on markdown-it producing h{1..6} tags in token.tag for this to work
232 result = []
233 while len(self._headings):
234 if level is None or self._headings[-1].level >= level:
235 heading = self._headings.pop()
236 if heading.container_tag == 'part' and not heading.partintro_closed:
237 result.append("</partintro>")
238 result.append(f"</{heading.container_tag}>")
239 else:
240 break
241 return "\n".join(result)
242
243 def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
244 attrs = {}
245 if id := token.attrs.get('id'):
246 attrs['xml:id'] = cast(str, id)
247 return ("section", attrs)