commit f361003e4e683b9258245b7af6b12bcbb8751449 · zenfyr.dev/xpost

+82

cross.py

···

       7
       7
        
       import re

     

       8
       8
        
       

     

       9
       9
        
       ALTERNATE = re.compile(r'\S+|\s+')

     

       10
       10
       +
       URL = re.compile(r'(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+', re.IGNORECASE)

     

       11
       11
       +
       MD_INLINE_LINK = re.compile(r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)", re.IGNORECASE)

     

       12
       12
       +
       MD_AUTOLINK = re.compile(r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE)

     

       13
       13
       +
       HASHTAG = re.compile(r'(?<!\w)\#([\w]+)')

     

       14
       14
       +
       FEDIVERSE_HANDLE = re.compile(r'(?<![\w@])@([\w-]+)(?:@([\w\.-]+\.[\w\.-]+))?')

     

       10
       15
        
       

     

       11
       16
        
       # generic token

     

       12
       17
        
       class Token():

     
···

       116
       121
        
           

     

       117
       122
        
           def delete_quote(self, quote_id: str):

     

       118
       123
        
               LOGGER.warning('Not Implemented.. "removed quote" %s', quote_id)

     

       124
       124
       +
       

     

       125
       125
       +
       def tokenize_markdown(text: str, tags: list[str], handles: list[tuple[str, str]]) -> list[Token]:

     

       126
       126
       +
           if not text:

     

       127
       127
       +
               return []

     

       128
       128
       +
           

     

       129
       129
       +
           index: int = 0

     

       130
       130
       +
           total: int = len(text)

     

       131
       131
       +
           buffer: list[str] = []

     

       132
       132
       +
           

     

       133
       133
       +
           tokens: list[Token] = []

     

       134
       134
       +
           

     

       135
       135
       +
           def flush():

     

       136
       136
       +
               nonlocal buffer

     

       137
       137
       +
               if buffer:

     

       138
       138
       +
                   tokens.append(TextToken(''.join(buffer)))

     

       139
       139
       +
                   buffer = []

     

       140
       140
       +
           

     

       141
       141
       +
           while index < total:

     

       142
       142
       +
               if text[index] == '[':

     

       143
       143
       +
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       144
       144
       +
                   if md_inline:

     

       145
       145
       +
                       flush()

     

       146
       146
       +
                       label = md_inline.group(1)

     

       147
       147
       +
                       href = md_inline.group(2)

     

       148
       148
       +
                       tokens.append(LinkToken(href, label))

     

       149
       149
       +
                       index = md_inline.end()

     

       150
       150
       +
                       continue

     

       151
       151
       +
               

     

       152
       152
       +
               if text[index] == '<':

     

       153
       153
       +
                   md_auto = MD_AUTOLINK.match(text, index)

     

       154
       154
       +
                   if md_auto:

     

       155
       155
       +
                       flush()

     

       156
       156
       +
                       href = md_auto.group(1)

     

       157
       157
       +
                       tokens.append(LinkToken(href, href))

     

       158
       158
       +
                       index = md_auto.end()

     

       159
       159
       +
                       continue

     

       160
       160
       +
               

     

       161
       161
       +
               if text[index] == '#':

     

       162
       162
       +
                   tag = HASHTAG.match(text, index)

     

       163
       163
       +
                   if tag:

     

       164
       164
       +
                       tag_text = tag.group(1)

     

       165
       165
       +
                       if tag_text.lower() in tags:

     

       166
       166
       +
                           flush()

     

       167
       167
       +
                           tokens.append(TagToken(tag_text))

     

       168
       168
       +
                           index = tag.end()

     

       169
       169
       +
                           continue

     

       170
       170
       +
               

     

       171
       171
       +
               if text[index] == '@':

     

       172
       172
       +
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       173
       173
       +
                   if handle:

     

       174
       174
       +
                       handle_text = handle.group(0)

     

       175
       175
       +
                       stripped_handle = handle_text.strip()

     

       176
       176
       +
                       

     

       177
       177
       +
                       match = next(

     

       178
       178
       +
                           (pair for pair in handles if stripped_handle in pair),

     

       179
       179
       +
                           None

     

       180
       180
       +
                       )

     

       181
       181
       +
                       

     

       182
       182
       +
                       if match:

     

       183
       183
       +
                           flush()

     

       184
       184
       +
                           tokens.append(MentionToken(match[1], ''))  # TODO: misskey doesn’t provide a uri

     

       185
       185
       +
                           index = handle.end()

     

       186
       186
       +
                           continue

     

       187
       187
       +
               

     

       188
       188
       +
               url = URL.match(text, index)

     

       189
       189
       +
               if url:

     

       190
       190
       +
                   flush()

     

       191
       191
       +
                   href = url.group(0)

     

       192
       192
       +
                   tokens.append(LinkToken(href, href))

     

       193
       193
       +
                   index = url.end()

     

       194
       194
       +
                   continue

     

       195
       195
       +
               

     

       196
       196
       +
               buffer.append(text[index])

     

       197
       197
       +
               index += 1

     

       198
       198
       +
                       

     

       199
       199
       +
           flush()

     

       200
       200
       +
           return tokens

     

       119
       201
        
       

     

       120
       202
        
       def split_tokens(tokens: list[Token], max_chars: int, max_link_len: int = 35) -> list[list[Token]]:

     

       121
       203
        
           def start_new_block():

+30 -1

mastodon.py

···

       85
       85
        
               recurse(child)

     

       86
       86
        
           

     

       87
       87
        
           return tokens

     

       88
       88
       +
       

     

       89
       89
       +
       MARKDOWNY = ['text/x.misskeymarkdown', 'text/markdown', 'text/plain']

     

       88
       90
        
           

     

       89
       91
        
       class MastodonPost(cross.Post):

     

       90
       92
        
           def __init__(self, status: dict, media_attachments: list[media_util.MediaInfo]) -> None:

     

       91
       93
        
               super().__init__()

     

       92
       94
        
               self.status = status

     

       93
       95
        
               self.media_attachments = media_attachments

     

       94
       94
       -
               self.tokens = tokenize_post(status)

     

       96
       96
       +
               self.tokens = self.__to_tokens()

     

       97
       97
       +
               

     

       98
       98
       +
           

     

       99
       99
       +
           def __to_tokens(self):

     

       100
       100
       +
               content_type = self.status.get('content_type', 'text/plain')

     

       101
       101
       +
               raw_text = self.status.get('text')

     

       102
       102
       +
               

     

       103
       103
       +
               tags: list[str] = []

     

       104
       104
       +
               for tag in self.status.get('tags', []):

     

       105
       105
       +
                   tags.append(tag['name'])

     

       106
       106
       +
               

     

       107
       107
       +
               mentions: list[tuple[str, str]] = []

     

       108
       108
       +
               for mention in self.status.get('mentions', []):

     

       109
       109
       +
                   mentions.append(('@' + mention['username'], '@' + mention['acct']))

     

       110
       110
       +
               

     

       111
       111
       +
               if raw_text and content_type in MARKDOWNY:

     

       112
       112
       +
                   return cross.tokenize_markdown(raw_text, tags, mentions)

     

       113
       113
       +
               

     

       114
       114
       +
               pleroma_ext: dict | None = self.status.get('pleroma', {}).get('content')

     

       115
       115
       +
               if pleroma_ext:

     

       116
       116
       +
                   for ctype in MARKDOWNY:

     

       117
       117
       +
                       if ctype not in pleroma_ext:

     

       118
       118
       +
                           continue

     

       119
       119
       +
                       

     

       120
       120
       +
                       return cross.tokenize_markdown(pleroma_ext[ctype], tags, mentions)

     

       121
       121
       +
                       

     

       122
       122
       +
               return tokenize_post(self.status)

     

       123
       123
       +
                   

     

       95
       124
        
           

     

       96
       125
        
           def get_tokens(self) -> list[cross.Token]:

     

       97
       126
        
               return self.tokens

+10 -85

misskey.py

···

       1
       1
        
       import cross, media_util, util, database

     

       2
       2
        
       from util import LOGGER

     

       3
       3
        
       import requests, websockets

     

       4
       4
       -
       import re

     

       5
       4
        
       from typing import Callable, Any

     

       6
       5
        
       import asyncio

     

       7
       6
        
       import json, uuid

     

       8
       8
       -
       

     

       9
       9
       -
       URL = re.compile(r'(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+',re.IGNORECASE)

     

       10
       10
       -
       MD_INLINE_LINK = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")

     

       11
       11
       -
       MD_AUTOLINK = re.compile(r"<((?:https?://[^\s>]+|mailto:[^\s>]+))>")

     

       12
       12
       -
       HASHTAG = re.compile(r'(?<!\w)\#([\w]+)')

     

       13
       13
       -
       FEDIVERSE_HANDLE = re.compile(r'(?<![\w@])@([\w-]+)(?:@([\w\.-]+\.[\w\.-]+))?')

     

       14
       14
       -
           

     

       15
       15
       -
       def tokenize_note(note: dict) -> list[cross.Token]:

     

       16
       16
       -
           text: str = note.get('text', '')

     

       17
       17
       -
           if not text:

     

       18
       18
       -
               return []

     

       19
       7
        
           

     

       20
       20
       -
           mention_handles: dict = note.get('mentionHandles') or {}

     

       21
       21
       -
           tags: list[str] = note.get('tags') or []

     

       22
       22
       -
           

     

       23
       23
       -
           handles: list[str] = []

     

       24
       24
       -
           for key, value in mention_handles.items():

     

       25
       25
       -
               handles.append(value)

     

       26
       26
       -
           

     

       27
       27
       -
           index: int = 0

     

       28
       28
       -
           total: int = len(text)

     

       29
       29
       -
           buffer: list[str] = []

     

       30
       30
       -
           

     

       31
       31
       -
           tokens: list[cross.Token] = []

     

       32
       32
       -
           

     

       33
       33
       -
           def flush():

     

       34
       34
       -
               nonlocal buffer

     

       35
       35
       -
               if buffer:

     

       36
       36
       -
                   tokens.append(cross.TextToken(''.join(buffer)))

     

       37
       37
       -
                   buffer = []

     

       38
       38
       -
           

     

       39
       39
       -
           while index < total:

     

       40
       40
       -
               if text[index] == '[':

     

       41
       41
       -
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       42
       42
       -
                   if md_inline:

     

       43
       43
       -
                       flush()

     

       44
       44
       -
                       label = md_inline.group(1)

     

       45
       45
       -
                       href = md_inline.group(2)

     

       46
       46
       -
                       tokens.append(cross.LinkToken(href, label))

     

       47
       47
       -
                       index = md_inline.end()

     

       48
       48
       -
                       continue

     

       49
       49
       -
               

     

       50
       50
       -
               if text[index] == '<':

     

       51
       51
       -
                   md_auto = MD_AUTOLINK.match(text, index)

     

       52
       52
       -
                   if md_auto:

     

       53
       53
       -
                       flush()

     

       54
       54
       -
                       href = md_auto.group(1)

     

       55
       55
       -
                       tokens.append(cross.LinkToken(href, href))

     

       56
       56
       -
                       index = md_auto.end()

     

       57
       57
       -
                       continue

     

       58
       58
       -
               

     

       59
       59
       -
               if text[index] == '#':

     

       60
       60
       -
                   tag = HASHTAG.match(text, index)

     

       61
       61
       -
                   if tag:

     

       62
       62
       -
                       tag_text = tag.group(1)

     

       63
       63
       -
                       if tag_text.lower() in tags:

     

       64
       64
       -
                           flush()

     

       65
       65
       -
                           tokens.append(cross.TagToken(tag_text))

     

       66
       66
       -
                           index = tag.end()

     

       67
       67
       -
                           continue

     

       68
       68
       -
               

     

       69
       69
       -
               if text[index] == '@':

     

       70
       70
       -
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       71
       71
       -
                   if handle:

     

       72
       72
       -
                       handle_text = handle.group(0)

     

       73
       73
       -
                       if handle_text.strip() in handles:

     

       74
       74
       -
                           flush()

     

       75
       75
       -
                           tokens.append(cross.MentionToken(handle_text, '')) # TODO misskey doesn't provide a uri

     

       76
       76
       -
                           index = handle.end()

     

       77
       77
       -
                           continue

     

       78
       78
       -
               

     

       79
       79
       -
               url = URL.match(text, index)

     

       80
       80
       -
               if url:

     

       81
       81
       -
                   flush()

     

       82
       82
       -
                   href = url.group(0)

     

       83
       83
       -
                   tokens.append(cross.LinkToken(href, href))

     

       84
       84
       -
                   index = url.end()

     

       85
       85
       -
                   continue

     

       86
       86
       -
               

     

       87
       87
       -
               buffer.append(text[index])

     

       88
       88
       -
               index += 1

     

       89
       89
       -
                       

     

       90
       90
       -
           flush()

     

       91
       91
       -
           return tokens

     

       8
       8
       +
       

     

       92
       9
        
       

     

       93
       10
        
       class MisskeyPost(cross.Post):

     

       94
       11
        
           def __init__(self, note: dict, files: list[media_util.MediaInfo]) -> None:

     
···

       96
       13
        
               self.note = note

     

       97
       14
        
               self.sensitive = any([a.get('isSensitive', False) for a in note.get('files', [])])

     

       98
       15
        
               self.media_attachments = files

     

       99
       99
       -
               self.tokens = tokenize_note(self.note)

     

       16
       16
       +
               

     

       17
       17
       +
               mention_handles: dict = note.get('mentionHandles') or {}

     

       18
       18
       +
               tags: list[str] = note.get('tags') or []

     

       19
       19
       +
           

     

       20
       20
       +
               handles: list[tuple[str, str]] = []

     

       21
       21
       +
               for key, value in mention_handles.items():

     

       22
       22
       +
                   handles.append((value, value))

     

       23
       23
       +
               

     

       24
       24
       +
               self.tokens = cross.tokenize_markdown(note.get('text', ''), tags, handles)

     

       100
       25
        
           

     

       101
       26
        
           def get_tokens(self) -> list[cross.Token]:

     

       102
       27
        
               return self.tokens

-2

util.py

···

       4
       4
        
       logging.basicConfig(stream=sys.stdout, level=logging.INFO)

     

       5
       5
        
       LOGGER = logging.getLogger("XPost")

     

       6
       6
        
       

     

       7
       7
       -
       import json

     

       8
       8
       -
       

     

       9
       7
        
       def as_json(obj, indent=None,sort_keys=False) -> str:

     

       10
       8
        
           return json.dumps(

     

       11
       9
        
               obj.__dict__ if not isinstance(obj, dict) else obj,