commit 793b1fd8e81b0f29f05f3c646bec43785a90d2f3 · zenfyr.dev/xpost

cross/fragments.py

···

       20
       20
        
       @dataclass(kw_only=True)

     

       21
       21
        
       class MentionFragment(Fragment):

     

       22
       22
        
           uri: str

     

       23
       23
       +
       

     

       24
       24
       +
       

     

       25
       25
       +
       NON_OVERLAPPING: set[type[Fragment]] = {LinkFragment, TagFragment, MentionFragment}

+1 -1

mastodon/input.py

···

       173
       173
        
               )

     

       174
       174
        
       

     

       175
       175
        
               for out in self.outputs:

     

       176
       176
       -
                   self.submitter(lambda: out.accept_repost(status["id"], reposted["id"]))

     

       176
       176
       +
                   self.submitter(lambda: out.accept_repost(status["id"], reblog["id"]))

     

       177
       177
        
       

     

       178
       178
        
           def _on_delete_post(self, status_id: str):

     

       179
       179
        
               post = self._get_post(self.url, self.user_id, status_id)

+25 -112

mastodon/parser.py

···

       1
       1
       -
       from html.parser import HTMLParser

     

       2
       1
        
       from typing import override

     

       3
       2
        
       import cross.fragments as f

     

       3
       3
       +
       from util.html import HTMLToFragmentsParser

     

       4
       4
        
       

     

       5
       5
        
       

     

       6
       6
       -
       class StatusParser(HTMLParser):

     

       6
       6
       +
       class StatusParser(HTMLToFragmentsParser):

     

       7
       7
        
           def __init__(self) -> None:

     

       8
       8
        
               super().__init__()

     

       9
       9
       -
               self.text: str = ""

     

       10
       10
       -
               self.fragments: list[f.Fragment] = []

     

       11
       11
       -
       

     

       12
       12
       -
               self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {}

     

       13
       13
       -
               self.in_pre: bool = False

     

       14
       14
       -
               self.in_code: bool = False

     

       15
       15
       -
       

     

       16
       16
       -
               self.invisible: bool = False

     

       17
       9
        
       

     

       18
       10
        
           @override

     

       19
       19
       -
           def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:

     

       20
       20
       -
               _attr = dict(attrs)

     

       21
       21
       -
       

     

       22
       22
       -
               def append_newline():

     

       23
       23
       -
                   if self.text and not self.text.endswith("\n"):

     

       24
       24
       -
                       self.text += "\n"

     

       25
       25
       -
       

     

       26
       26
       -
               if self.invisible:

     

       27
       27
       -
                   return

     

       28
       28
       -
       

     

       29
       29
       -
               match tag:

     

       30
       30
       -
                   case "p":

     

       31
       31
       -
                       cls = _attr.get('class', '')

     

       32
       32
       -
                       if cls and 'quote-inline' in cls:

     

       33
       33
       -
                           self.invisible = True

     

       34
       34
       -
                   case "a":

     

       35
       35
       -
                       self._tag_stack["a"] = (len(self.text), _attr)

     

       36
       36
       -
                   case "code":

     

       37
       37
       -
                       if not self.in_pre:

     

       38
       38
       -
                           self.text += "`"

     

       39
       39
       -
                           self.in_code = True

     

       40
       40
       -
                   case "pre":

     

       41
       41
       -
                       append_newline()

     

       42
       42
       -
                       self.text += "```\n"

     

       43
       43
       -
                       self.in_pre = True

     

       44
       44
       -
                   case "blockquote":

     

       45
       45
       -
                       append_newline()

     

       46
       46
       -
                       self.text += "> "

     

       47
       47
       -
                   case "strong" | "b":

     

       48
       48
       -
                       self.text += "**"

     

       49
       49
       -
                   case "em" | "i":

     

       50
       50
       -
                       self.text += "*"

     

       51
       51
       -
                   case "del" | "s":

     

       52
       52
       -
                       self.text += "~~"

     

       53
       53
       -
                   case "br":

     

       54
       54
       -
                       self.text += "\n"

     

       55
       55
       -
                   case _:

     

       56
       56
       -
                       if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:

     

       57
       57
       -
                           level = int(tag[1])

     

       58
       58
       -
                           self.text += "\n" + "#" * level + " "

     

       59
       59
       -
       

     

       60
       60
       -
           @override

     

       61
       61
       -
           def handle_endtag(self, tag: str) -> None:

     

       62
       62
       -
               if self.invisible:

     

       63
       63
       -
                   if tag == "p":

     

       64
       64
       -
                       self.invisible = False

     

       65
       65
       -
                   return

     

       66
       66
       -
       

     

       11
       11
       +
           def handle_a_endtag(self):

     

       67
       12
        
               current_end = len(self.text)

     

       68
       68
       -
               match tag:

     

       69
       69
       -
                   case "a":

     

       70
       70
       -
                       if "a" in self._tag_stack:

     

       71
       71
       -
                           start, _attr = self._tag_stack.pop("a")

     

       13
       13
       +
               start, _attr = self._tag_stack.pop("a")

     

       72
       14
        
       

     

       73
       73
       -
                           href = _attr.get('href')

     

       74
       74
       -
                           if href and current_end > start:

     

       75
       75
       -
                               cls = _attr.get('class', '')

     

       76
       76
       -
                               if cls:

     

       77
       77
       -
                                   if 'hashtag' in cls:

     

       78
       78
       -
                                       tag = self.text[start:current_end]

     

       79
       79
       -
                                       tag = tag[1:] if tag.startswith('#') else tag

     

       80
       80
       -
       

     

       81
       81
       -
                                       self.fragments.append(

     

       82
       82
       -
                                           f.TagFragment(start=start, end=current_end, tag=tag)

     

       83
       83
       -
                                       )

     

       84
       84
       -
                                       return

     

       85
       85
       -
                                   if 'mention' in cls: # TODO put the full acct in the fragment

     

       86
       86
       -
                                       mention = self.text[start:current_end]

     

       87
       87
       -
                                       self.fragments.append(

     

       88
       88
       -
                                           f.MentionFragment(start=start, end=current_end, uri=mention)

     

       89
       89
       -
                                       )

     

       90
       90
       -
                                       return

     

       91
       91
       -
                               self.fragments.append(

     

       92
       92
       -
                                   f.LinkFragment(start=start, end=current_end, url=href)

     

       93
       93
       -
                               )

     

       94
       94
       -
                   case "code":

     

       95
       95
       -
                       if not self.in_pre and self.in_code:

     

       96
       96
       -
                           self.text += "`"

     

       97
       97
       -
                           self.in_code = False

     

       98
       98
       -
                   case "pre":

     

       99
       99
       -
                       self.text += "\n```\n"

     

       100
       100
       -
                       self.in_pre = False

     

       101
       101
       -
                   case "blockquote":

     

       102
       102
       -
                       self.text += "\n"

     

       103
       103
       -
                   case "strong" | "b":

     

       104
       104
       -
                       self.text += "**"

     

       105
       105
       -
                   case "em" | "i":

     

       106
       106
       -
                       self.text += "*"

     

       107
       107
       -
                   case "del" | "s":

     

       108
       108
       -
                       self.text += "~~"

     

       109
       109
       -
                   case "p":

     

       110
       110
       -
                       self.text += "\n\n"

     

       111
       111
       -
                   case _:

     

       112
       112
       -
                       if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:

     

       113
       113
       -
                           self.text += '\n'

     

       15
       15
       +
               href = _attr.get('href')

     

       16
       16
       +
               if href and current_end > start:

     

       17
       17
       +
                   cls = _attr.get('class', '')

     

       18
       18
       +
                   if cls:

     

       19
       19
       +
                       if 'hashtag' in cls:

     

       20
       20
       +
                           tag = self.text[start:current_end]

     

       21
       21
       +
                           tag = tag[1:] if tag.startswith('#') else tag

     

       114
       22
        
       

     

       115
       115
       -
           @override

     

       116
       116
       -
           def handle_data(self, data: str) -> None:

     

       117
       117
       -
               if not self.invisible:

     

       118
       118
       -
                   self.text += data

     

       119
       119
       -
       

     

       120
       120
       -
           def get_result(self) -> tuple[str, list[f.Fragment]]:

     

       121
       121
       -
               if self.text.endswith('\n\n'):

     

       122
       122
       -
                   return self.text[:-2], self.fragments

     

       123
       123
       -
               return self.text, self.fragments

     

       23
       23
       +
                           self.fragments.append(

     

       24
       24
       +
                               f.TagFragment(start=start, end=current_end, tag=tag)

     

       25
       25
       +
                           )

     

       26
       26
       +
                           return

     

       27
       27
       +
                       if 'mention' in cls: # TODO put the full acct in the fragment

     

       28
       28
       +
                           mention = self.text[start:current_end]

     

       29
       29
       +
                           mention = mention[1:] if mention.startswith('@') else mention

     

       30
       30
       +
                           self.fragments.append(

     

       31
       31
       +
                               f.MentionFragment(start=start, end=current_end, uri=mention)

     

       32
       32
       +
                           )

     

       33
       33
       +
                           return

     

       34
       34
       +
                   self.fragments.append(

     

       35
       35
       +
                       f.LinkFragment(start=start, end=current_end, url=href)

     

       36
       36
       +
                   )

+112 -1

misskey/input.py

···

       7
       7
        
       

     

       8
       8
        
       import websockets

     

       9
       9
        
       

     

       10
       10
       +
       from cross.attachments import (

     

       11
       11
       +
           LabelsAttachment,

     

       12
       12
       +
           MediaAttachment,

     

       13
       13
       +
           RemoteUrlAttachment,

     

       14
       14
       +
           SensitiveAttachment,

     

       15
       15
       +
       )

     

       16
       16
       +
       from cross.media import Blob, download_blob

     

       17
       17
       +
       from cross.post import Post

     

       10
       18
        
       from cross.service import InputService

     

       11
       19
        
       from database.connection import DatabasePool

     

       12
       20
        
       from misskey.info import MisskeyService

     

       21
       21
       +
       from util.markdown import MarkdownParser

     

       13
       22
        
       from util.util import normalize_service_url

     

       14
       23
        
       

     

       15
       24
        
       ALLOWED_VISIBILITY = ["public", "home"]

     
···

       53
       62
        
               return self.options.token

     

       54
       63
        
       

     

       55
       64
        
           def _on_note(self, note: dict[str, Any]):

     

       56
       56
       -
               self.log.info(note) # TODO

     

       65
       65
       +
               if note["userId"] != self.user_id:

     

       66
       66
       +
                   return

     

       67
       67
       +
       

     

       68
       68
       +
               if note["visibility"] not in self.options.allowed_visibility:

     

       69
       69
       +
                   return

     

       70
       70
       +
       

     

       71
       71
       +
               if note.get("poll"):

     

       72
       72
       +
                   self.log.info("Skipping '%s'! Contains a poll..", note["id"])

     

       73
       73
       +
                   return

     

       74
       74
       +
       

     

       75
       75
       +
               renote: dict[str, Any] | None = note.get("renote")

     

       76
       76
       +
               if renote:

     

       77
       77
       +
                   if note.get("text") is not None:

     

       78
       78
       +
                       self.log.info("Skipping '%s'! Quote..", note["id"])

     

       79
       79
       +
                       return

     

       80
       80
       +
                   self._on_renote(note, renote)

     

       81
       81
       +
                   return

     

       82
       82
       +
       

     

       83
       83
       +
               reply: dict[str, Any] | None = note.get("reply")

     

       84
       84
       +
               if reply:

     

       85
       85
       +
                   if reply.get("userId") != self.user_id:

     

       86
       86
       +
                       self.log.info("Skipping '%s'! Reply to other user..", note["id"])

     

       87
       87
       +
                       return

     

       88
       88
       +
       

     

       89
       89
       +
               parent = None

     

       90
       90
       +
               if reply:

     

       91
       91
       +
                   parent = self._get_post(self.url, self.user_id, reply["id"])

     

       92
       92
       +
                   if not parent:

     

       93
       93
       +
                       self.log.info(

     

       94
       94
       +
                           "Skipping %s, parent %s not found in db", note["id"], reply["id"]

     

       95
       95
       +
                       )

     

       96
       96
       +
                       return

     

       97
       97
       +
       

     

       98
       98
       +
               parser = MarkdownParser()  # TODO MFM parser

     

       99
       99
       +
               text, fragments = parser.parse(note.get("text", ""))

     

       100
       100
       +
               post = Post(id=note["id"], parent_id=reply["id"] if reply else None, text=text)

     

       101
       101
       +
               post.fragments.extend(fragments)

     

       102
       102
       +
       

     

       103
       103
       +
               post.attachments.put(RemoteUrlAttachment(url=self.url + "/notes/" + note["id"]))

     

       104
       104
       +
               if any([a.get("isSensitive", False) for a in note.get("files", [])]):

     

       105
       105
       +
                   post.attachments.put(SensitiveAttachment(sensitive=True))

     

       106
       106
       +
               if note.get("cw"):

     

       107
       107
       +
                   post.attachments.put(LabelsAttachment(labels=[note["cw"]]))

     

       108
       108
       +
       

     

       109
       109
       +
               blobs: list[Blob] = []

     

       110
       110
       +
               for media in note.get("files", []):

     

       111
       111
       +
                   self.log.info("Downloading %s...", media["url"])

     

       112
       112
       +
                   blob: Blob | None = download_blob(media["url"], media.get("comment", ""))

     

       113
       113
       +
                   if not blob:

     

       114
       114
       +
                       self.log.error(

     

       115
       115
       +
                           "Skipping %s! Failed to download media %s.",

     

       116
       116
       +
                           note["id"],

     

       117
       117
       +
                           media["url"],

     

       118
       118
       +
                       )

     

       119
       119
       +
                       return

     

       120
       120
       +
                   blobs.append(blob)

     

       121
       121
       +
       

     

       122
       122
       +
               if blobs:

     

       123
       123
       +
                   post.attachments.put(MediaAttachment(blobs=blobs))

     

       124
       124
       +
       

     

       125
       125
       +
               if parent:

     

       126
       126
       +
                   self._insert_post(

     

       127
       127
       +
                       {

     

       128
       128
       +
                           "user": self.user_id,

     

       129
       129
       +
                           "service": self.url,

     

       130
       130
       +
                           "identifier": note["id"],

     

       131
       131
       +
                           "parent": parent["id"],

     

       132
       132
       +
                           "root": parent["id"] if not parent["root"] else parent["root"],

     

       133
       133
       +
                       }

     

       134
       134
       +
                   )

     

       135
       135
       +
               else:

     

       136
       136
       +
                   self._insert_post(

     

       137
       137
       +
                       {

     

       138
       138
       +
                           "user": self.user_id,

     

       139
       139
       +
                           "service": self.url,

     

       140
       140
       +
                           "identifier": note["id"],

     

       141
       141
       +
                       }

     

       142
       142
       +
                   )

     

       143
       143
       +
       

     

       144
       144
       +
               for out in self.outputs:

     

       145
       145
       +
                   self.submitter(lambda: out.accept_post(post))

     

       146
       146
       +
       

     

       147
       147
       +
           def _on_renote(self, note: dict[str, Any], renote: dict[str, Any]):

     

       148
       148
       +
               reposted = self._get_post(self.url, self.user_id, renote["id"])

     

       149
       149
       +
               if not reposted:

     

       150
       150
       +
                   self.log.info(

     

       151
       151
       +
                       "Skipping repost '%s' as reposted post '%s' was not found in the db.",

     

       152
       152
       +
                       note["id"],

     

       153
       153
       +
                       renote["id"],

     

       154
       154
       +
                   )

     

       155
       155
       +
                   return

     

       156
       156
       +
       

     

       157
       157
       +
               self._insert_post(

     

       158
       158
       +
                   {

     

       159
       159
       +
                       "user": self.user_id,

     

       160
       160
       +
                       "service": self.url,

     

       161
       161
       +
                       "identifier": note["id"],

     

       162
       162
       +
                       "reposted": reposted["id"],

     

       163
       163
       +
                   }

     

       164
       164
       +
               )

     

       165
       165
       +
       

     

       166
       166
       +
               for out in self.outputs:

     

       167
       167
       +
                   self.submitter(lambda: out.accept_repost(note["id"], renote["id"]))

     

       57
       168
        
       

     

       58
       169
        
           def _accept_msg(self, msg: websockets.Data) -> None:

     

       59
       170
        
               data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))

+110

util/html.py

···

       1
       1
       +
       from html.parser import HTMLParser

     

       2
       2
       +
       from typing import override

     

       3
       3
       +
       import cross.fragments as f

     

       4
       4
       +
       

     

       5
       5
       +
       

     

       6
       6
       +
       class HTMLToFragmentsParser(HTMLParser):

     

       7
       7
       +
           def __init__(self) -> None:

     

       8
       8
       +
               super().__init__()

     

       9
       9
       +
               self.text: str = ""

     

       10
       10
       +
               self.fragments: list[f.Fragment] = []

     

       11
       11
       +
       

     

       12
       12
       +
               self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {}

     

       13
       13
       +
               self.in_pre: bool = False

     

       14
       14
       +
               self.in_code: bool = False

     

       15
       15
       +
       

     

       16
       16
       +
               self.invisible: bool = False

     

       17
       17
       +
       

     

       18
       18
       +
           def handle_a_endtag(self):

     

       19
       19
       +
               current_end = len(self.text)

     

       20
       20
       +
               start, _attr = self._tag_stack.pop("a")

     

       21
       21
       +
       

     

       22
       22
       +
               href = _attr.get('href')

     

       23
       23
       +
               if href and current_end > start:

     

       24
       24
       +
                   self.fragments.append(

     

       25
       25
       +
                       f.LinkFragment(start=start, end=current_end, url=href)

     

       26
       26
       +
                   )

     

       27
       27
       +
       

     

       28
       28
       +
           @override

     

       29
       29
       +
           def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:

     

       30
       30
       +
               _attr = dict(attrs)

     

       31
       31
       +
       

     

       32
       32
       +
               def append_newline():

     

       33
       33
       +
                   if self.text and not self.text.endswith("\n"):

     

       34
       34
       +
                       self.text += "\n"

     

       35
       35
       +
       

     

       36
       36
       +
               if self.invisible:

     

       37
       37
       +
                   return

     

       38
       38
       +
       

     

       39
       39
       +
               match tag:

     

       40
       40
       +
                   case "p":

     

       41
       41
       +
                       cls = _attr.get('class', '')

     

       42
       42
       +
                       if cls and 'quote-inline' in cls:

     

       43
       43
       +
                           self.invisible = True

     

       44
       44
       +
                   case "a":

     

       45
       45
       +
                       self._tag_stack["a"] = (len(self.text), _attr)

     

       46
       46
       +
                   case "code":

     

       47
       47
       +
                       if not self.in_pre:

     

       48
       48
       +
                           self.text += "`"

     

       49
       49
       +
                           self.in_code = True

     

       50
       50
       +
                   case "pre":

     

       51
       51
       +
                       append_newline()

     

       52
       52
       +
                       self.text += "```\n"

     

       53
       53
       +
                       self.in_pre = True

     

       54
       54
       +
                   case "blockquote":

     

       55
       55
       +
                       append_newline()

     

       56
       56
       +
                       self.text += "> "

     

       57
       57
       +
                   case "strong" | "b":

     

       58
       58
       +
                       self.text += "**"

     

       59
       59
       +
                   case "em" | "i":

     

       60
       60
       +
                       self.text += "*"

     

       61
       61
       +
                   case "del" | "s":

     

       62
       62
       +
                       self.text += "~~"

     

       63
       63
       +
                   case "br":

     

       64
       64
       +
                       self.text += "\n"

     

       65
       65
       +
                   case _:

     

       66
       66
       +
                       if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:

     

       67
       67
       +
                           level = int(tag[1])

     

       68
       68
       +
                           self.text += "\n" + "#" * level + " "

     

       69
       69
       +
       

     

       70
       70
       +
           @override

     

       71
       71
       +
           def handle_endtag(self, tag: str) -> None:

     

       72
       72
       +
               if self.invisible:

     

       73
       73
       +
                   if tag == "p":

     

       74
       74
       +
                       self.invisible = False

     

       75
       75
       +
                   return

     

       76
       76
       +
       

     

       77
       77
       +
               match tag:

     

       78
       78
       +
                   case "a":

     

       79
       79
       +
                       if "a" in self._tag_stack:

     

       80
       80
       +
                           self.handle_a_endtag()

     

       81
       81
       +
                   case "code":

     

       82
       82
       +
                       if not self.in_pre and self.in_code:

     

       83
       83
       +
                           self.text += "`"

     

       84
       84
       +
                           self.in_code = False

     

       85
       85
       +
                   case "pre":

     

       86
       86
       +
                       self.text += "\n```\n"

     

       87
       87
       +
                       self.in_pre = False

     

       88
       88
       +
                   case "blockquote":

     

       89
       89
       +
                       self.text += "\n"

     

       90
       90
       +
                   case "strong" | "b":

     

       91
       91
       +
                       self.text += "**"

     

       92
       92
       +
                   case "em" | "i":

     

       93
       93
       +
                       self.text += "*"

     

       94
       94
       +
                   case "del" | "s":

     

       95
       95
       +
                       self.text += "~~"

     

       96
       96
       +
                   case "p":

     

       97
       97
       +
                       self.text += "\n\n"

     

       98
       98
       +
                   case _:

     

       99
       99
       +
                       if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:

     

       100
       100
       +
                           self.text += '\n'

     

       101
       101
       +
       

     

       102
       102
       +
           @override

     

       103
       103
       +
           def handle_data(self, data: str) -> None:

     

       104
       104
       +
               if not self.invisible:

     

       105
       105
       +
                   self.text += data

     

       106
       106
       +
       

     

       107
       107
       +
           def get_result(self) -> tuple[str, list[f.Fragment]]:

     

       108
       108
       +
               if self.text.endswith('\n\n'):

     

       109
       109
       +
                   return self.text[:-2], self.fragments

     

       110
       110
       +
               return self.text, self.fragments

+143

util/markdown.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       import cross.fragments as f

     

       3
       3
       +
       from util.html import HTMLToFragmentsParser

     

       4
       4
       +
       

     

       5
       5
       +
       URL = re.compile(r"(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+", re.IGNORECASE)

     

       6
       6
       +
       MD_INLINE_LINK = re.compile(

     

       7
       7
       +
           r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)",

     

       8
       8
       +
           re.IGNORECASE,

     

       9
       9
       +
       )

     

       10
       10
       +
       MD_AUTOLINK = re.compile(

     

       11
       11
       +
           r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE

     

       12
       12
       +
       )

     

       13
       13
       +
       HASHTAG = re.compile(r"(?<!\w)\#([\w]+)")

     

       14
       14
       +
       FEDIVERSE_HANDLE = re.compile(r"(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?")

     

       15
       15
       +
       

     

       16
       16
       +
       REGEXES = [URL, MD_INLINE_LINK, MD_AUTOLINK, HASHTAG, FEDIVERSE_HANDLE]

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       # TODO autolinks are broken by the html parser

     

       20
       20
       +
       class MarkdownParser:

     

       21
       21
       +
           def parse(self, text: str) -> tuple[str, list[f.Fragment]]:

     

       22
       22
       +
               if not text:

     

       23
       23
       +
                   return "", []

     

       24
       24
       +
       

     

       25
       25
       +
               html_parser = HTMLToFragmentsParser()

     

       26
       26
       +
               html_parser.feed(text)

     

       27
       27
       +
               markdown, fragments = html_parser.get_result()

     

       28
       28
       +
       

     

       29
       29
       +
               index: int = 0

     

       30
       30
       +
               total: int = len(markdown)

     

       31
       31
       +
       

     

       32
       32
       +
               # no match == processed fragments

     

       33
       33
       +
               events: list[tuple[int, int, re.Match[str] | None, str]] = []

     

       34
       34
       +
               events.extend([(fg.start, fg.end, None, "html") for fg in fragments])

     

       35
       35
       +
               while index < total:

     

       36
       36
       +
                   ch = markdown[index]

     

       37
       37
       +
                   rmatch = None

     

       38
       38
       +
                   kind = None

     

       39
       39
       +
       

     

       40
       40
       +
                   if ch == "[":

     

       41
       41
       +
                       rmatch = MD_INLINE_LINK.match(markdown, index)

     

       42
       42
       +
                       kind = "inline_link"

     

       43
       43
       +
                   # elif ch == '<':

     

       44
       44
       +
                   #    rmatch = MD_AUTOLINK.match(markdown, index)

     

       45
       45
       +
                   #    kind = "autolink"

     

       46
       46
       +
                   elif ch == "#":

     

       47
       47
       +
                       rmatch = HASHTAG.match(markdown, index)

     

       48
       48
       +
                       kind = "hashtag"

     

       49
       49
       +
                   elif ch == "@":

     

       50
       50
       +
                       rmatch = FEDIVERSE_HANDLE.match(markdown, index)

     

       51
       51
       +
                       kind = "mention"

     

       52
       52
       +
                   else:

     

       53
       53
       +
                       rmatch = URL.match(markdown, index)

     

       54
       54
       +
                       kind = "url"

     

       55
       55
       +
       

     

       56
       56
       +
                   if rmatch:

     

       57
       57
       +
                       start, end = rmatch.start(), rmatch.end()

     

       58
       58
       +
                       if end == index:

     

       59
       59
       +
                           index += 1

     

       60
       60
       +
                           continue

     

       61
       61
       +
                       events.append((start, end, rmatch, kind))

     

       62
       62
       +
                       index = end

     

       63
       63
       +
                       continue

     

       64
       64
       +
       

     

       65
       65
       +
                   index += 1

     

       66
       66
       +
       

     

       67
       67
       +
               events.sort(key=lambda x: x[0])

     

       68
       68
       +
       

     

       69
       69
       +
               # validate fragment positions

     

       70
       70
       +
               last_end: int = 0

     

       71
       71
       +
               for start, end, _, _ in events:

     

       72
       72
       +
                   if start > end:

     

       73
       73
       +
                       raise Exception(f"Invalid fragment position start={start}, end={end}")

     

       74
       74
       +
                   if last_end > start:

     

       75
       75
       +
                       raise Exception(

     

       76
       76
       +
                           f"Overlapping text fragments at position end={last_end}, start={start}"

     

       77
       77
       +
                       )

     

       78
       78
       +
                   last_end = end

     

       79
       79
       +
       

     

       80
       80
       +
               def update_fragments(start: int, s, offset: int):

     

       81
       81
       +
                   nonlocal fragments

     

       82
       82
       +
       

     

       83
       83
       +
                   for fg in fragments:

     

       84
       84
       +
                       if fg != s and fg.start >= start:

     

       85
       85
       +
                           fg.start += offset

     

       86
       86
       +
                           fg.end += offset

     

       87
       87
       +
       

     

       88
       88
       +
               new_text = ""

     

       89
       89
       +
               last_pos = 0

     

       90
       90
       +
               for start, end, rmatch, event in events:

     

       91
       91
       +
                   if start > last_pos:

     

       92
       92
       +
                       new_text += markdown[last_pos:start]

     

       93
       93
       +
       

     

       94
       94
       +
                   if not rmatch:

     

       95
       95
       +
                       new_text += markdown[start:end]

     

       96
       96
       +
                       last_pos = end

     

       97
       97
       +
                       continue

     

       98
       98
       +
       

     

       99
       99
       +
                   match event:

     

       100
       100
       +
                       case "inline_link":

     

       101
       101
       +
                           label = rmatch.group(1)

     

       102
       102
       +
                           href = rmatch.group(2)

     

       103
       103
       +
                           fg = f.LinkFragment(start=start, end=start + len(label), url=href)

     

       104
       104
       +
                           fragments.append(fg)

     

       105
       105
       +
                           update_fragments(start, fg, -(end - (start + len(label))))

     

       106
       106
       +
                           new_text += label

     

       107
       107
       +
                       # case "autolink":

     

       108
       108
       +
                       #    url = rmatch.group(0)

     

       109
       109
       +
                       #    fg = f.LinkFragment(start=start, end=end - 2, url=url)

     

       110
       110
       +
                       #    fragments.append(fg)

     

       111
       111
       +
                       #    update_fragments(start, fg, -2)

     

       112
       112
       +
                       #    new_text += url

     

       113
       113
       +
                       case "hashtag":

     

       114
       114
       +
                           tag = rmatch.group(0)

     

       115
       115
       +
                           fragments.append(

     

       116
       116
       +
                               f.TagFragment(

     

       117
       117
       +
                                   start=start,

     

       118
       118
       +
                                   end=end,

     

       119
       119
       +
                                   tag=tag[1:] if tag.startswith("#") else tag,

     

       120
       120
       +
                               )

     

       121
       121
       +
                           )

     

       122
       122
       +
                           new_text += markdown[start:end]

     

       123
       123
       +
                       case "mention":

     

       124
       124
       +
                           mention = rmatch.group(0)

     

       125
       125
       +
                           fragments.append(

     

       126
       126
       +
                               f.MentionFragment(

     

       127
       127
       +
                                   start=start,

     

       128
       128
       +
                                   end=end,

     

       129
       129
       +
                                   uri=mention[1:] if mention.startswith("@") else mention,

     

       130
       130
       +
                               )

     

       131
       131
       +
                           )

     

       132
       132
       +
                           new_text += markdown[start:end]

     

       133
       133
       +
                       case "url":

     

       134
       134
       +
                           url = rmatch.group(0)

     

       135
       135
       +
                           fragments.append(f.LinkFragment(start=start, end=end, url=url))

     

       136
       136
       +
                           new_text += markdown[start:end]

     

       137
       137
       +
                       case _:

     

       138
       138
       +
                           pass

     

       139
       139
       +
                   last_pos = end

     

       140
       140
       +
               if last_pos < len(markdown):

     

       141
       141
       +
                   new_text += markdown[last_pos:]

     

       142
       142
       +
       

     

       143
       143
       +
               return new_text, fragments