commit da95ab11b41eec733dca5212ce16d16a15dc55d4 · pyrox.dev/nixpkgs

+1 -1

doc/Makefile

···

       3
       3
        
       PANDOC ?= pandoc

     

       4
       4
        
       

     

       5
       5
        
       pandoc_media_dir = media

     

       6
       6
       -
       # NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh).

     

       6
       6
       +
       # NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh) and conversion script (/maintainers/scripts/db-to-md.sh).

     

       7
       7
        
       # TODO: Remove raw-attribute when we can get rid of DocBook altogether.

     

       8
       8
        
       pandoc_commonmark_enabled_extensions = +attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute

     

       9
       9
        
       # Not needed:

+88

maintainers/scripts/db-to-md.sh

···

       1
       1
       +
       #! /usr/bin/env nix-shell

     

       2
       2
       +
       #! nix-shell -I nixpkgs=. -i bash -p pandoc

     

       3
       3
       +
       

     

       4
       4
       +
       # This script is temporarily needed while we transition the manual to

     

       5
       5
       +
       # CommonMark. It converts DocBook files into our CommonMark flavour.

     

       6
       6
       +
       

     

       7
       7
       +
       debug=

     

       8
       8
       +
       files=()

     

       9
       9
       +
       

     

       10
       10
       +
       while [ "$#" -gt 0 ]; do

     

       11
       11
       +
           i="$1"; shift 1

     

       12
       12
       +
           case "$i" in

     

       13
       13
       +
             --debug)

     

       14
       14
       +
               debug=1

     

       15
       15
       +
               ;;

     

       16
       16
       +
             *)

     

       17
       17
       +
               files+=("$i")

     

       18
       18
       +
               ;;

     

       19
       19
       +
           esac

     

       20
       20
       +
       done

     

       21
       21
       +
       

     

       22
       22
       +
       echo "WARNING: This is an experimental script and might not preserve all formatting." > /dev/stderr

     

       23
       23
       +
       echo "Please report any issues you discover." > /dev/stderr

     

       24
       24
       +
       

     

       25
       25
       +
       outExtension="md"

     

       26
       26
       +
       if [[ $debug ]]; then

     

       27
       27
       +
           outExtension="json"

     

       28
       28
       +
       fi

     

       29
       29
       +
       

     

       30
       30
       +
       DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

     

       31
       31
       +
       

     

       32
       32
       +
       # NOTE: Keep in sync with Nixpkgs manual (/doc/Makefile).

     

       33
       33
       +
       # TODO: Remove raw-attribute when we can get rid of DocBook altogether.

     

       34
       34
       +
       pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute

     

       35
       35
       +
       targetLang="commonmark${pandoc_commonmark_enabled_extensions}+smart"

     

       36
       36
       +
       if [[ $debug ]]; then

     

       37
       37
       +
           targetLang=json

     

       38
       38
       +
       fi

     

       39
       39
       +
       pandoc_flags=(

     

       40
       40
       +
           # Not needed:

     

       41
       41
       +
           # - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies)

     

       42
       42
       +
           # - media extraction (was only required for diagram generator)

     

       43
       43
       +
           # - myst-reader/roles.lua (only relevant for MyST → DocBook)

     

       44
       44
       +
           # - link-unix-man-references.lua (links should only be added to display output)

     

       45
       45
       +
           # - docbook-writer/rst-roles.lua (only relevant for → DocBook)

     

       46
       46
       +
           # - docbook-writer/labelless-link-is-xref.lua (only relevant for → DocBook)

     

       47
       47
       +
           "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua"

     

       48
       48
       +
           "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/myst-writer/roles.lua"

     

       49
       49
       +
           "--lua-filter=$DIR/doc/unknown-code-language.lua"

     

       50
       50
       +
           -f docbook

     

       51
       51
       +
           -t "$targetLang"

     

       52
       52
       +
           --tab-stop=2

     

       53
       53
       +
           --wrap=none

     

       54
       54
       +
       )

     

       55
       55
       +
       

     

       56
       56
       +
       for file in "${files[@]}"; do

     

       57
       57
       +
           if [[ ! -f "$file" ]]; then

     

       58
       58
       +
               echo "db-to-md.sh: $file does not exist" > /dev/stderr

     

       59
       59
       +
               exit 1

     

       60
       60
       +
           else

     

       61
       61
       +
           rootElement=$(xmllint --xpath 'name(//*)' "$file")

     

       62
       62
       +
       

     

       63
       63
       +
           if [[ $rootElement = chapter ]]; then

     

       64
       64
       +
               extension=".chapter.$outExtension"

     

       65
       65
       +
           elif [[ $rootElement = section ]]; then

     

       66
       66
       +
               extension=".section.$outExtension"

     

       67
       67
       +
           else

     

       68
       68
       +
               echo "db-to-md.sh: $file contains an unsupported root element $rootElement" > /dev/stderr

     

       69
       69
       +
               exit 1

     

       70
       70
       +
           fi

     

       71
       71
       +
       

     

       72
       72
       +
           outFile="${file%".section.xml"}"

     

       73
       73
       +
           outFile="${outFile%".chapter.xml"}"

     

       74
       74
       +
           outFile="${outFile%".xml"}$extension"

     

       75
       75
       +
           temp1=$(mktemp)

     

       76
       76
       +
           $DIR/doc/escape-code-markup.py "$file" "$temp1"

     

       77
       77
       +
           if [[ $debug ]]; then

     

       78
       78
       +
               echo "Converted $file to $temp1" > /dev/stderr

     

       79
       79
       +
           fi

     

       80
       80
       +
           temp2=$(mktemp)

     

       81
       81
       +
           $DIR/doc/replace-xrefs-by-empty-links.py "$temp1" "$temp2"

     

       82
       82
       +
           if [[ $debug ]]; then

     

       83
       83
       +
               echo "Converted $temp1 to $temp2" > /dev/stderr

     

       84
       84
       +
           fi

     

       85
       85
       +
           pandoc "$temp2" -o "$outFile" "${pandoc_flags[@]}"

     

       86
       86
       +
           echo "Converted $file to $outFile" > /dev/stderr

     

       87
       87
       +
         fi

     

       88
       88
       +
       done

+97

maintainers/scripts/doc/escape-code-markup.py

···

       1
       1
       +
       #! /usr/bin/env nix-shell

     

       2
       2
       +
       #! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml

     

       3
       3
       +
       

     

       4
       4
       +
       """

     

       5
       5
       +
       Pandoc will strip any markup within code elements so

     

       6
       6
       +
       let’s escape them so that they can be handled manually.

     

       7
       7
       +
       """

     

       8
       8
       +
       

     

       9
       9
       +
       import lxml.etree as ET

     

       10
       10
       +
       import re

     

       11
       11
       +
       import sys

     

       12
       12
       +
       

     

       13
       13
       +
       def replace_element_by_text(el: ET.Element, text: str) -> None:

     

       14
       14
       +
           """

     

       15
       15
       +
           Author: bernulf

     

       16
       16
       +
           Source: https://stackoverflow.com/a/10520552/160386

     

       17
       17
       +
           SPDX-License-Identifier: CC-BY-SA-3.0

     

       18
       18
       +
           """

     

       19
       19
       +
           text = text + (el.tail or "")

     

       20
       20
       +
           parent = el.getparent()

     

       21
       21
       +
           if parent is not None:

     

       22
       22
       +
               previous = el.getprevious()

     

       23
       23
       +
               if previous is not None:

     

       24
       24
       +
                   previous.tail = (previous.tail or "") + text

     

       25
       25
       +
               else:

     

       26
       26
       +
                   parent.text = (parent.text or "") + text

     

       27
       27
       +
               parent.remove(el)

     

       28
       28
       +
       

     

       29
       29
       +
       DOCBOOK_NS = "http://docbook.org/ns/docbook"

     

       30
       30
       +
       

     

       31
       31
       +
       # List of elements that pandoc’s DocBook reader strips markup from.

     

       32
       32
       +
       # https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs

     

       33
       33
       +
       code_elements = [

     

       34
       34
       +
           # CodeBlock

     

       35
       35
       +
           "literallayout",

     

       36
       36
       +
           "screen",

     

       37
       37
       +
           "programlisting",

     

       38
       38
       +
           # Code (inline)

     

       39
       39
       +
           "classname",

     

       40
       40
       +
           "code",

     

       41
       41
       +
           "filename",

     

       42
       42
       +
           "envar",

     

       43
       43
       +
           "literal",

     

       44
       44
       +
           "computeroutput",

     

       45
       45
       +
           "prompt",

     

       46
       46
       +
           "parameter",

     

       47
       47
       +
           "option",

     

       48
       48
       +
           "markup",

     

       49
       49
       +
           "wordasword",

     

       50
       50
       +
           "command",

     

       51
       51
       +
           "varname",

     

       52
       52
       +
           "function",

     

       53
       53
       +
           "type",

     

       54
       54
       +
           "symbol",

     

       55
       55
       +
           "constant",

     

       56
       56
       +
           "userinput",

     

       57
       57
       +
           "systemitem",

     

       58
       58
       +
       ]

     

       59
       59
       +
       

     

       60
       60
       +
       XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"')

     

       61
       61
       +
       ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>')

     

       62
       62
       +
       

     

       63
       63
       +
       def remove_xmlns(match: re.Match) -> str:

     

       64
       64
       +
           """

     

       65
       65
       +
           Removes xmlns attributes.

     

       66
       66
       +
       

     

       67
       67
       +
           Expects a match containing an opening tag.

     

       68
       68
       +
           """

     

       69
       69
       +
           return XMLNS_REGEX.sub('', match.group(0))

     

       70
       70
       +
       

     

       71
       71
       +
       if __name__ == '__main__':

     

       72
       72
       +
           assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>"

     

       73
       73
       +
       

     

       74
       74
       +
           tree = ET.parse(sys.argv[1])

     

       75
       75
       +
           name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements])

     

       76
       76
       +
       

     

       77
       77
       +
           for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"):

     

       78
       78
       +
               text = ET.tostring(markup, encoding=str)

     

       79
       79
       +
       

     

       80
       80
       +
               # tostring adds xmlns attributes to the element we want to stringify

     

       81
       81
       +
               # as if it was supposed to be usable standalone.

     

       82
       82
       +
               # We are just converting it to CDATA so we do not care.

     

       83
       83
       +
               # Let’s strip the namespace declarations to keep the code clean.

     

       84
       84
       +
               #

     

       85
       85
       +
               # Note that this removes even namespaces that were potentially

     

       86
       86
       +
               # in the original file. Though, that should be very rare –

     

       87
       87
       +
               # most of the time, we will stringify empty DocBook elements

     

       88
       88
       +
               # like <xref> or <co> or, at worst, <link> with xlink:href attribute.

     

       89
       89
       +
               #

     

       90
       90
       +
               # Also note that the regex expects the root element to be first

     

       91
       91
       +
               # thing in the string. But that should be fine, the tostring method

     

       92
       92
       +
               # does not produce XML declaration or doctype by default.

     

       93
       93
       +
               text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text)

     

       94
       94
       +
       

     

       95
       95
       +
               replace_element_by_text(markup, text)

     

       96
       96
       +
       

     

       97
       97
       +
           tree.write(sys.argv[2])

+32

maintainers/scripts/doc/replace-xrefs-by-empty-links.py

···

       1
       1
       +
       #! /usr/bin/env nix-shell

     

       2
       2
       +
       #! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml

     

       3
       3
       +
       

     

       4
       4
       +
       """

     

       5
       5
       +
       Pandoc will try to resolve xrefs and replace them with regular links.

     

       6
       6
       +
       let’s replace them with links with empty labels which MyST

     

       7
       7
       +
       and our pandoc filters recognize as cross-references.

     

       8
       8
       +
       """

     

       9
       9
       +
       

     

       10
       10
       +
       import lxml.etree as ET

     

       11
       11
       +
       import sys

     

       12
       12
       +
       

     

       13
       13
       +
       XLINK_NS = "http://www.w3.org/1999/xlink"

     

       14
       14
       +
       

     

       15
       15
       +
       ns = {

     

       16
       16
       +
           "db": "http://docbook.org/ns/docbook",

     

       17
       17
       +
       }

     

       18
       18
       +
       

     

       19
       19
       +
       

     

       20
       20
       +
       if __name__ == '__main__':

     

       21
       21
       +
           assert len(sys.argv) >= 3, "usage: replace-xrefs-by-empty-links.py <input> <output>"

     

       22
       22
       +
       

     

       23
       23
       +
           tree = ET.parse(sys.argv[1])

     

       24
       24
       +
           for xref in tree.findall(".//db:xref", ns):

     

       25
       25
       +
               text = ET.tostring(xref, encoding=str)

     

       26
       26
       +
               parent = xref.getparent()

     

       27
       27
       +
               link = parent.makeelement('link')

     

       28
       28
       +
               target_name = xref.get("linkend")

     

       29
       29
       +
               link.set(f"{{{XLINK_NS}}}href", f"#{target_name}")

     

       30
       30
       +
               parent.replace(xref, link)

     

       31
       31
       +
       

     

       32
       32
       +
           tree.write(sys.argv[2])

+12

maintainers/scripts/doc/unknown-code-language.lua

···

       1
       1
       +
       --[[

     

       2
       2
       +
       Adds “unknown” class to CodeBlock AST nodes without any classes.

     

       3
       3
       +
       

     

       4
       4
       +
       This will cause Pandoc to use fenced code block, which we prefer.

     

       5
       5
       +
       ]]

     

       6
       6
       +
       

     

       7
       7
       +
       function CodeBlock(elem)

     

       8
       8
       +
         if #elem.classes == 0 then

     

       9
       9
       +
           elem.classes:insert('unknown')

     

       10
       10
       +
           return elem

     

       11
       11
       +
         end

     

       12
       12
       +
       end