commit c004136e36fd29c7a8a7fde44a6d9f91eefcd7bc · zenfyr.dev/xpost

+11

bluesky/common.py

···

       131
       131
        
           def get_attachments(self) -> list[MediaInfo]:

     

       132
       132
        
               return self.attachments

     

       133
       133
        
           

     

       134
       134
       +
           def get_text_type(self) -> str:

     

       135
       135
       +
               return "text/plain"

     

       136
       136
       +
       

     

       137
       137
       +
           def get_post_url(self) -> str | None:

     

       138
       138
       +
               at_uri: str = self.post['$xpost.strongRef']['uri'][len("at://"):]

     

       139
       139
       +
               

     

       140
       140
       +
               parts = at_uri.split("/")

     

       141
       141
       +
               did, _, post_id = parts

     

       142
       142
       +
               

     

       143
       143
       +
               return f"https://bsky.app/profile/{did}/post/{post_id}"

     

       144
       144
       +
           

     

       134
       145
        
       

     

       135
       146
        
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       136
       147
        
           builder = client_utils.TextBuilder()

+8 -1

bluesky/output.py

···

       8
       8
        
       from bluesky.common import SERVICE, ADULT_PATTERN, PORN_PATTERN, tokens_to_richtext

     

       9
       9
        
       

     

       10
       10
        
       import cross, util.database as database

     

       11
       11
       +
       import misskey.mfm_util as mfm_util

     

       11
       12
        
       from util.util import LOGGER, as_envvar

     

       12
       13
        
       from util.media import MediaInfo, get_filename_from_url, get_media_meta, compress_image, convert_to_mp4

     

       13
       14
        
       from util.database import DataBaseWorker

     
···

       194
       195
        
                               f"[{get_filename_from_url(attachment.url)}]"

     

       195
       196
        
                       ))

     

       196
       197
        
                       tokens.append(cross.TextToken(' '))

     

       197
       197
       -
       

     

       198
       198
       +
               

     

       199
       199
       +
               if post.get_text_type() == "text/x.misskeymarkdown":

     

       200
       200
       +
                   tokens, status = mfm_util.strip_mfm(tokens)

     

       201
       201
       +
                   post_url = post.get_post_url()

     

       202
       202
       +
                   if status and post_url:

     

       203
       203
       +
                       tokens.append(cross.TextToken('\n'))

     

       204
       204
       +
                       tokens.append(cross.LinkToken(post_url, "[Post contains MFM, see original]"))

     

       198
       205
        
               

     

       199
       206
        
               split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)

     

       200
       207
        
               post_text: list[client_utils.TextBuilder] = []

+10 -83

cross.py

···

       6
       6
        
       import re

     

       7
       7
        
       

     

       8
       8
        
       ALTERNATE = re.compile(r'\S+|\s+')

     

       9
       9
       -
       URL = re.compile(r'(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+', re.IGNORECASE)

     

       10
       10
       -
       MD_INLINE_LINK = re.compile(r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)", re.IGNORECASE)

     

       11
       11
       -
       MD_AUTOLINK = re.compile(r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE)

     

       12
       12
       -
       HASHTAG = re.compile(r'(?<!\w)\#([\w]+)')

     

       13
       13
       -
       FEDIVERSE_HANDLE = re.compile(r'(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?')

     

       14
       9
        
       

     

       15
       10
        
       # generic token

     

       16
       11
        
       class Token():

     
···

       59
       54
        
           

     

       60
       55
        
       class Post():

     

       61
       56
        
           def __init__(self) -> None:

     

       57
       57
       +
               self.now_timestamp = datetime.now(timezone.utc).isoformat()

     

       62
       58
        
               pass

     

       63
       59
        
           

     

       64
       60
        
           def get_tokens(self) -> list[Token]:

     
···

       68
       64
        
               return None

     

       69
       65
        
           

     

       70
       66
        
           def get_post_date_iso(self) -> str:

     

       71
       71
       -
               return datetime.now(timezone.utc).isoformat()

     

       67
       67
       +
               return self.now_timestamp

     

       72
       68
        
           

     

       73
       69
        
           def get_attachments(self) -> list[MediaInfo]:

     

       74
       70
        
               return []

     
···

       85
       81
        
           def is_sensitive(self) -> bool:

     

       86
       82
        
               return False

     

       87
       83
        
       

     

       84
       84
       +
           # returns input text type.

     

       85
       85
       +
           # text/plain, text/markdown, text/x.misskeymarkdown

     

       86
       86
       +
           def get_text_type(self) -> str:

     

       87
       87
       +
               return 'text/plain'

     

       88
       88
       +
           

     

       89
       89
       +
           def get_post_url(self) -> str | None:

     

       90
       90
       +
               return None

     

       91
       91
       +
       

     

       88
       92
        
       # generic input service.

     

       89
       93
        
       # user and service for db queries

     

       90
       94
        
       class Input():

     
···

       142
       146
        
                   return False

     

       143
       147
        
           

     

       144
       148
        
           return True

     

       145
       145
       -
       

     

       146
       146
       -
       def tokenize_markdown(text: str, tags: list[str], handles: list[tuple[str, str]]) -> list[Token]:

     

       147
       147
       -
           if not text:

     

       148
       148
       -
               return []

     

       149
       149
       -
           

     

       150
       150
       -
           index: int = 0

     

       151
       151
       -
           total: int = len(text)

     

       152
       152
       -
           buffer: list[str] = []

     

       153
       153
       -
           

     

       154
       154
       -
           tokens: list[Token] = []

     

       155
       155
       -
           

     

       156
       156
       -
           def flush():

     

       157
       157
       -
               nonlocal buffer

     

       158
       158
       -
               if buffer:

     

       159
       159
       -
                   tokens.append(TextToken(''.join(buffer)))

     

       160
       160
       -
                   buffer = []

     

       161
       161
       -
           

     

       162
       162
       -
           while index < total:

     

       163
       163
       -
               if text[index] == '[':

     

       164
       164
       -
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       165
       165
       -
                   if md_inline:

     

       166
       166
       -
                       flush()

     

       167
       167
       -
                       label = md_inline.group(1)

     

       168
       168
       -
                       href = md_inline.group(2)

     

       169
       169
       -
                       tokens.append(LinkToken(href, label))

     

       170
       170
       -
                       index = md_inline.end()

     

       171
       171
       -
                       continue

     

       172
       172
       -
               

     

       173
       173
       -
               if text[index] == '<':

     

       174
       174
       -
                   md_auto = MD_AUTOLINK.match(text, index)

     

       175
       175
       -
                   if md_auto:

     

       176
       176
       -
                       flush()

     

       177
       177
       -
                       href = md_auto.group(1)

     

       178
       178
       -
                       tokens.append(LinkToken(href, href))

     

       179
       179
       -
                       index = md_auto.end()

     

       180
       180
       -
                       continue

     

       181
       181
       -
               

     

       182
       182
       -
               if text[index] == '#':

     

       183
       183
       -
                   tag = HASHTAG.match(text, index)

     

       184
       184
       -
                   if tag:

     

       185
       185
       -
                       tag_text = tag.group(1)

     

       186
       186
       -
                       if tag_text.lower() in tags:

     

       187
       187
       -
                           flush()

     

       188
       188
       -
                           tokens.append(TagToken(tag_text))

     

       189
       189
       -
                           index = tag.end()

     

       190
       190
       -
                           continue

     

       191
       191
       -
               

     

       192
       192
       -
               if text[index] == '@':

     

       193
       193
       -
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       194
       194
       -
                   if handle:

     

       195
       195
       -
                       handle_text = handle.group(0)

     

       196
       196
       -
                       stripped_handle = handle_text.strip()

     

       197
       197
       -
                       

     

       198
       198
       -
                       match = next(

     

       199
       199
       -
                           (pair for pair in handles if stripped_handle in pair),

     

       200
       200
       -
                           None

     

       201
       201
       -
                       )

     

       202
       202
       -
                       

     

       203
       203
       -
                       if match:

     

       204
       204
       -
                           flush()

     

       205
       205
       -
                           tokens.append(MentionToken(match[1], ''))  # TODO: misskey doesn’t provide a uri

     

       206
       206
       -
                           index = handle.end()

     

       207
       207
       -
                           continue

     

       208
       208
       -
               

     

       209
       209
       -
               url = URL.match(text, index)

     

       210
       210
       -
               if url:

     

       211
       211
       -
                   flush()

     

       212
       212
       -
                   href = url.group(0)

     

       213
       213
       -
                   tokens.append(LinkToken(href, href))

     

       214
       214
       -
                   index = url.end()

     

       215
       215
       -
                   continue

     

       216
       216
       -
               

     

       217
       217
       -
               buffer.append(text[index])

     

       218
       218
       -
               index += 1

     

       219
       219
       -
                       

     

       220
       220
       -
           flush()

     

       221
       221
       -
           return tokens

     

       222
       149
        
       

     

       223
       150
        
       def split_tokens(tokens: list[Token], max_chars: int, max_link_len: int = 35) -> list[list[Token]]:

     

       224
       151
        
           def new_block():

+10 -6

mastodon/common.py

···

       7
       7
        
               self.status = status

     

       8
       8
        
               self.media_attachments = media_attachments

     

       9
       9
        
               self.tokens = tokens

     

       10
       10
       +
               self.content_type = status.get('content_type', 'text/plain')

     

       10
       11
        
           

     

       11
       12
        
           def get_tokens(self) -> list[cross.Token]:

     

       12
       13
        
               return self.tokens

     
···

       15
       16
        
               return self.status.get('in_reply_to_id')

     

       16
       17
        
           

     

       17
       18
        
           def get_post_date_iso(self) -> str:

     

       18
       18
       -
               date = self.status.get('created_at')

     

       19
       19
       -
               return date or super().get_post_date_iso()

     

       19
       19
       +
               return self.status.get('created_at') or self.now_timestamp

     

       20
       20
        
           

     

       21
       21
        
           def get_cw(self) -> str:

     

       22
       22
        
               return self.status.get('spoiler_text') or ''

     
···

       25
       25
        
               return self.status['id']

     

       26
       26
        
           

     

       27
       27
        
           def get_languages(self) -> list[str]:

     

       28
       28
       -
               if self.status.get('language'):

     

       29
       29
       -
                   return [self.status['language']]

     

       30
       30
       -
               return []

     

       28
       28
       +
               return [self.status['language']] if self.status.get('language') else []

     

       31
       29
        
           

     

       32
       30
        
           def is_sensitive(self) -> bool:

     

       33
       31
        
               return self.status.get('sensitive', False)

     

       34
       32
        
           

     

       35
       33
        
           def get_attachments(self) -> list[MediaInfo]:

     

       36
       36
       -
               return self.media_attachments
     

       34
       34
       +
               return self.media_attachments

     

       35
       35
       +
           

     

       36
       36
       +
           def get_text_type(self) -> str:

     

       37
       37
       +
               return self.content_type

     

       38
       38
       +
           

     

       39
       39
       +
           def get_post_url(self) -> str | None:

     

       40
       40
       +
               return self.status.get('url')

+13 -13

mastodon/html_util.py util/html_util.py

···

       7
       7
        
               self.tokens: list[cross.Token] = []

     

       8
       8
        
               self.status: dict

     

       9
       9
        
               

     

       10
       10
       +
               self.mentions: list[tuple[str, str]]

     

       11
       11
       +
               self.tags: list[str]

     

       12
       12
       +
               

     

       10
       13
        
               self.in_pre = False

     

       11
       14
        
               self.in_code = False

     

       12
       15
        
               

     
···

       98
       101
        
                   self.anchor_data = []

     

       99
       102
        
                   

     

       100
       103
        
                   if anchor_data.startswith('#'):

     

       101
       101
       -
                       tags: list[dict] = self.status.get('tags', [])

     

       102
       102
       -
                       

     

       103
       104
        
                       as_tag = anchor_data[1:].lower()

     

       104
       104
       -
                       if any(as_tag == block.get('name') for block in tags):

     

       105
       105
       +
                       if any(as_tag == block for block in self.tags):

     

       105
       106
        
                           self.tokens.append(cross.TagToken(anchor_data[1:]))

     

       106
       107
        
                   elif anchor_data.startswith('@'):

     

       107
       107
       -
                       mentions: list[dict] = self.status.get('mentions', [])

     

       108
       108
       +
                       match = next(

     

       109
       109
       +
                           (pair for pair in self.mentions if anchor_data in pair),

     

       110
       110
       +
                           None

     

       111
       111
       +
                       )

     

       108
       112
        
                       

     

       109
       109
       -
                       as_mention = anchor_data[1:]

     

       110
       110
       -
                       for block in mentions:

     

       111
       111
       -
                           if href == block.get('url'):

     

       112
       112
       -
                               self.tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       113
       113
       -
                               break

     

       114
       114
       -
                           elif as_mention == block.get('acct') or as_mention == block.get('username'):

     

       115
       115
       -
                               self.tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       116
       116
       -
                               break

     

       113
       113
       +
                       if match:

     

       114
       114
       +
                           self.tokens.append(cross.MentionToken(match[1], ''))

     

       117
       115
        
                   else:

     

       118
       116
        
                       self.tokens.append(cross.LinkToken(href, anchor_data))

     

       119
       117
        
               

     
···

       180
       178
        
               """Reset the parser state for reuse."""

     

       181
       179
        
               super().reset()

     

       182
       180
        
               self.tokens = []

     

       183
       183
       -
               self.status = {}

     

       181
       181
       +
               

     

       182
       182
       +
               self.mentions = []

     

       183
       183
       +
               self.tags = []

     

       184
       184
        
               

     

       185
       185
        
               self.in_pre = False

     

       186
       186
        
               self.in_code = False

+6 -4

mastodon/input.py

···

       4
       4
        
       import asyncio

     

       5
       5
        
       

     

       6
       6
        
       from mastodon.common import MastodonPost

     

       7
       7
       -
       import mastodon.html_util as html_util

     

       7
       7
       +
       import util.html_util as html_util

     

       8
       8
       +
       import util.md_util as md_util

     

       8
       9
        
       

     

       9
       10
        
       import cross, util.database as database

     

       10
       11
        
       from util.util import LOGGER, as_envvar

     
···

       69
       70
        
                   mentions.append(('@' + mention['username'], '@' + mention['acct']))

     

       70
       71
        
               

     

       71
       72
        
               if raw_text and content_type in MARKDOWNY:

     

       72
       72
       -
                   return cross.tokenize_markdown(raw_text, tags, mentions)

     

       73
       73
       +
                   return md_util.tokenize_markdown(raw_text, tags, mentions)

     

       73
       74
        
               

     

       74
       75
        
               akkoma_ext: dict | None = status.get('akkoma', {}).get('source')

     

       75
       76
        
               if akkoma_ext:

     

       76
       77
        
                   if akkoma_ext.get('mediaType') in MARKDOWNY:

     

       77
       77
       -
                       return cross.tokenize_markdown(akkoma_ext["content"], tags, mentions)

     

       78
       78
       +
                       return md_util.tokenize_markdown(akkoma_ext["content"], tags, mentions)

     

       78
       79
        
               

     

       79
       80
        
               tokenizer = html_util.HTMLPostTokenizer()

     

       80
       80
       -
               tokenizer.status = status

     

       81
       81
       +
               tokenizer.mentions = mentions

     

       82
       82
       +
               tokenizer.tags = tags

     

       81
       83
        
               tokenizer.feed(status.get('content', ""))

     

       82
       84
        
               return tokenizer.get_tokens()

     

       83
       85

+11 -2

mastodon/output.py

···

       1
       1
        
       import requests, time

     

       2
       2
        
       

     

       3
       3
        
       import cross, util.database as database

     

       4
       4
       +
       import misskey.mfm_util as mfm_util

     

       4
       5
        
       from util.util import LOGGER, as_envvar, canonical_label

     

       5
       6
        
       from util.media import MediaInfo

     

       6
       7
        
       from util.database import DataBaseWorker

     
···

       249
       250
        
                   lang = post.get_languages()[0]

     

       250
       251
        
               else:

     

       251
       252
        
                   lang = 'en'

     

       252
       252
       -
               

     

       253
       253
       -
               raw_statuses = self.split_tokens_media(post.get_tokens(), post.get_attachments())

     

       253
       253
       +
                   

     

       254
       254
       +
               post_tokens = post.get_tokens()

     

       255
       255
       +
               if post.get_text_type() == "text/x.misskeymarkdown":

     

       256
       256
       +
                   post_tokens, status = mfm_util.strip_mfm(post_tokens)

     

       257
       257
       +
                   post_url = post.get_post_url()

     

       258
       258
       +
                   if status and post_url:

     

       259
       259
       +
                       post_tokens.append(cross.TextToken('\n'))

     

       260
       260
       +
                       post_tokens.append(cross.LinkToken(post_url, "[Post contains MFM, see original]"))

     

       261
       261
       +
                   

     

       262
       262
       +
               raw_statuses = self.split_tokens_media(post_tokens, post.get_attachments())

     

       254
       263
        
               if not raw_statuses:

     

       255
       264
        
                   LOGGER.error("Failed to split post into statuses?")

     

       256
       265
        
                   return None

+9 -2

misskey/common.py

···

       2
       2
        
       from util.media import MediaInfo

     

       3
       3
        
       

     

       4
       4
        
       class MisskeyPost(cross.Post):

     

       5
       5
       -
           def __init__(self, note: dict, tokens: list[cross.Token], files: list[MediaInfo]) -> None:

     

       5
       5
       +
           def __init__(self, instance_url: str, note: dict, tokens: list[cross.Token], files: list[MediaInfo]) -> None:

     

       6
       6
        
               super().__init__()

     

       7
       7
        
               self.note = note

     

       8
       8
        
               self.sensitive = any([a.get('isSensitive', False) for a in note.get('files', [])])

     

       9
       9
        
               self.media_attachments = files

     

       10
       10
        
               self.tokens = tokens

     

       11
       11
       +
               self.url = instance_url + '/notes/' + note['id']

     

       11
       12
        
           

     

       12
       13
        
           def get_tokens(self) -> list[cross.Token]:

     

       13
       14
        
               return self.tokens

     
···

       32
       33
        
               return []

     

       33
       34
        
           

     

       34
       35
        
           def is_sensitive(self) -> bool:

     

       35
       35
       -
               return self.sensitive
     

       36
       36
       +
               return self.sensitive

     

       37
       37
       +
           

     

       38
       38
       +
           def get_text_type(self) -> str:

     

       39
       39
       +
               return "text/x.misskeymarkdown"

     

       40
       40
       +
           

     

       41
       41
       +
           def get_post_url(self) -> str | None:

     

       42
       42
       +
               return self.url

+3 -2

misskey/input.py

···

       6
       6
        
       from misskey.common import MisskeyPost

     

       7
       7
        
       

     

       8
       8
        
       import cross, util.database as database

     

       9
       9
       +
       import util.md_util as md_util

     

       9
       10
        
       from util.media import MediaInfo, download_media

     

       10
       11
        
       from util.util import LOGGER, as_envvar

     

       11
       12
        
       

     
···

       75
       76
        
               for key, value in mention_handles.items():

     

       76
       77
        
                   handles.append((value, value))

     

       77
       78
        
               

     

       78
       78
       -
               tokens = cross.tokenize_markdown(note.get('text', ''), tags, handles)

     

       79
       79
       +
               tokens = md_util.tokenize_markdown(note.get('text', ''), tags, handles)

     

       79
       80
        
               if not cross.test_filters(tokens, self.options.filters):

     

       80
       81
        
                   LOGGER.info("Skipping '%s'. Matched a filter!", note['id'])

     

       81
       82
        
                   return

     
···

       91
       92
        
                       return

     

       92
       93
        
                   media_attachments.append(info)

     

       93
       94
        
               

     

       94
       94
       -
               cross_post = MisskeyPost(note, tokens, media_attachments)

     

       95
       95
       +
               cross_post = MisskeyPost(self.service, note, tokens, media_attachments)

     

       95
       96
        
               for output in outputs:

     

       96
       97
        
                   output.accept_post(cross_post)

     

       97
       98

+35

misskey/mfm_util.py

···

       1
       1
       +
       import re, cross

     

       2
       2
       +
       

     

       3
       3
       +
       MFM_PATTERN = re.compile(r'\$\[([^\[\]]+)\]')

     

       4
       4
       +
       

     

       5
       5
       +
       def strip_mfm(tokens: list[cross.Token]) -> tuple[list[cross.Token], bool]:

     

       6
       6
       +
           modified = False

     

       7
       7
       +
       

     

       8
       8
       +
           for tk in tokens:

     

       9
       9
       +
               if isinstance(tk, cross.TextToken):

     

       10
       10
       +
                   original = tk.text

     

       11
       11
       +
                   cleaned = __strip_mfm(original)

     

       12
       12
       +
                   if cleaned != original:

     

       13
       13
       +
                       modified = True

     

       14
       14
       +
                       tk.text = cleaned

     

       15
       15
       +
       

     

       16
       16
       +
               elif isinstance(tk, cross.LinkToken):

     

       17
       17
       +
                   original = tk.label

     

       18
       18
       +
                   cleaned = __strip_mfm(original)

     

       19
       19
       +
                   if cleaned != original:

     

       20
       20
       +
                       modified = True

     

       21
       21
       +
                       tk.label = cleaned

     

       22
       22
       +
       

     

       23
       23
       +
           return tokens, modified

     

       24
       24
       +
       

     

       25
       25
       +
       def __strip_mfm(text: str) -> str:

     

       26
       26
       +
           def match_contents(match: re.Match[str]):

     

       27
       27
       +
               content = match.group(1).strip()

     

       28
       28
       +
               parts = content.split(' ', 1)

     

       29
       29
       +
               return parts[1] if len(parts) > 1 else ''

     

       30
       30
       +
       

     

       31
       31
       +
           while MFM_PATTERN.search(text):

     

       32
       32
       +
               text = MFM_PATTERN.sub(match_contents, text)

     

       33
       33
       +
       

     

       34
       34
       +
           return text

     

       35
       35
       +

+112

util/md_util.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       import util.html_util as html_util

     

       5
       5
       +
       import util.util as util

     

       6
       6
       +
       

     

       7
       7
       +
       URL = re.compile(r'(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+', re.IGNORECASE)

     

       8
       8
       +
       MD_INLINE_LINK = re.compile(r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)", re.IGNORECASE)

     

       9
       9
       +
       MD_AUTOLINK = re.compile(r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE)

     

       10
       10
       +
       HASHTAG = re.compile(r'(?<!\w)\#([\w]+)')

     

       11
       11
       +
       FEDIVERSE_HANDLE = re.compile(r'(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?')

     

       12
       12
       +
       

     

       13
       13
       +
       def tokenize_markdown(text: str, tags: list[str], handles: list[tuple[str, str]]) -> list[cross.Token]:

     

       14
       14
       +
           if not text:

     

       15
       15
       +
               return []

     

       16
       16
       +
           

     

       17
       17
       +
           tokenizer = html_util.HTMLPostTokenizer()

     

       18
       18
       +
           tokenizer.mentions = handles

     

       19
       19
       +
           tokenizer.tags = tags

     

       20
       20
       +
           tokenizer.feed(text)

     

       21
       21
       +
           html_tokens = tokenizer.get_tokens()

     

       22
       22
       +
           

     

       23
       23
       +
           tokens: list[cross.Token] = []

     

       24
       24
       +
           

     

       25
       25
       +
           for tk in html_tokens:

     

       26
       26
       +
               if isinstance(tk, cross.TextToken):

     

       27
       27
       +
                   tokens.extend(__tokenize_md(tk.text, tags, handles))

     

       28
       28
       +
               elif isinstance(tk, cross.LinkToken):

     

       29
       29
       +
                   if not tk.label or util.canonical_label(tk.label, tk.href):

     

       30
       30
       +
                       tokens.append(tk)

     

       31
       31
       +
                       continue

     

       32
       32
       +
                   

     

       33
       33
       +
                   tokens.extend(__tokenize_md(f"[{tk.label}]({tk.href})", tags, handles))

     

       34
       34
       +
               else:

     

       35
       35
       +
                   tokens.append(tk)

     

       36
       36
       +
           

     

       37
       37
       +
           return tokens

     

       38
       38
       +
           

     

       39
       39
       +
       

     

       40
       40
       +
       def __tokenize_md(text: str, tags: list[str], handles: list[tuple[str, str]]) -> list[cross.Token]:

     

       41
       41
       +
           index: int = 0

     

       42
       42
       +
           total: int = len(text)

     

       43
       43
       +
           buffer: list[str] = []

     

       44
       44
       +
           

     

       45
       45
       +
           tokens: list[cross.Token] = []

     

       46
       46
       +
           

     

       47
       47
       +
           def flush():

     

       48
       48
       +
               nonlocal buffer

     

       49
       49
       +
               if buffer:

     

       50
       50
       +
                   tokens.append(cross.TextToken(''.join(buffer)))

     

       51
       51
       +
                   buffer = []

     

       52
       52
       +
           

     

       53
       53
       +
           while index < total:

     

       54
       54
       +
               if text[index] == '[':

     

       55
       55
       +
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       56
       56
       +
                   if md_inline:

     

       57
       57
       +
                       flush()

     

       58
       58
       +
                       label = md_inline.group(1)

     

       59
       59
       +
                       href = md_inline.group(2)

     

       60
       60
       +
                       tokens.append(cross.LinkToken(href, label))

     

       61
       61
       +
                       index = md_inline.end()

     

       62
       62
       +
                       continue

     

       63
       63
       +
               

     

       64
       64
       +
               if text[index] == '<':

     

       65
       65
       +
                   md_auto = MD_AUTOLINK.match(text, index)

     

       66
       66
       +
                   if md_auto:

     

       67
       67
       +
                       flush()

     

       68
       68
       +
                       href = md_auto.group(1)

     

       69
       69
       +
                       tokens.append(cross.LinkToken(href, href))

     

       70
       70
       +
                       index = md_auto.end()

     

       71
       71
       +
                       continue

     

       72
       72
       +
               

     

       73
       73
       +
               if text[index] == '#':

     

       74
       74
       +
                   tag = HASHTAG.match(text, index)

     

       75
       75
       +
                   if tag:

     

       76
       76
       +
                       tag_text = tag.group(1)

     

       77
       77
       +
                       if tag_text.lower() in tags:

     

       78
       78
       +
                           flush()

     

       79
       79
       +
                           tokens.append(cross.TagToken(tag_text))

     

       80
       80
       +
                           index = tag.end()

     

       81
       81
       +
                           continue

     

       82
       82
       +
               

     

       83
       83
       +
               if text[index] == '@':

     

       84
       84
       +
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       85
       85
       +
                   if handle:

     

       86
       86
       +
                       handle_text = handle.group(0)

     

       87
       87
       +
                       stripped_handle = handle_text.strip()

     

       88
       88
       +
                       

     

       89
       89
       +
                       match = next(

     

       90
       90
       +
                           (pair for pair in handles if stripped_handle in pair),

     

       91
       91
       +
                           None

     

       92
       92
       +
                       )

     

       93
       93
       +
                       

     

       94
       94
       +
                       if match:

     

       95
       95
       +
                           flush()

     

       96
       96
       +
                           tokens.append(cross.MentionToken(match[1], ''))  # TODO: misskey doesn’t provide a uri

     

       97
       97
       +
                           index = handle.end()

     

       98
       98
       +
                           continue

     

       99
       99
       +
               

     

       100
       100
       +
               url = URL.match(text, index)

     

       101
       101
       +
               if url:

     

       102
       102
       +
                   flush()

     

       103
       103
       +
                   href = url.group(0)

     

       104
       104
       +
                   tokens.append(cross.LinkToken(href, href))

     

       105
       105
       +
                   index = url.end()

     

       106
       106
       +
                   continue

     

       107
       107
       +
               

     

       108
       108
       +
               buffer.append(text[index])

     

       109
       109
       +
               index += 1

     

       110
       110
       +
                       

     

       111
       111
       +
           flush()

     

       112
       112
       +
           return tokens