this repo has no description

feat: resurrect most-liked

Changed files
+120 -59
feeds
+2 -5
feedgen.py
···
import websockets
from feed_manager import feed_manager
-
from firehose_manager import FirehoseManager
logging.basicConfig(
format='%(asctime)s - %(levelname)-5s - %(name)-20s - %(message)s',
···
logging.getLogger('feeds').setLevel(logging.DEBUG)
logging.getLogger('firehose').setLevel(logging.DEBUG)
-
async def firehose_events(firehose_manager):
+
async def firehose_events():
relay_url = 'ws://localhost:6008/subscribe'
logger = logging.getLogger('feeds.events')
···
yield json.load(payload)
async def main():
-
firehose_manager = FirehoseManager()
event_count = 0
-
async for commit in firehose_events(firehose_manager):
+
async for commit in firehose_events():
feed_manager.process_commit(commit)
event_count += 1
if event_count % 2500 == 0:
feed_manager.commit_changes()
-
firehose_manager.set_sequence_number(commit['seq'])
if __name__ == '__main__':
asyncio.run(main())
+118 -24
feeds/mostliked.py
···
import apsw
import apsw.ext
+
import threading
+
import queue
from . import BaseFeed
+
class DatabaseWorker(threading.Thread):
+
def __init__(self, name, db_path, task_queue):
+
super().__init__()
+
self.db_cnx = apsw.Connection(db_path)
+
self.db_cnx.pragma('foreign_keys', True)
+
self.db_cnx.pragma('journal_mode', 'WAL')
+
self.db_cnx.pragma('wal_autocheckpoint', '0')
+
self.stop_signal = False
+
self.task_queue = task_queue
+
self.logger = logging.getLogger(f'feeds.db.{name}')
+
self.changes = 0
+
+
def run(self):
+
while not self.stop_signal:
+
task = self.task_queue.get(block=True)
+
if task == 'STOP':
+
self.stop_signal = True
+
elif task == 'COMMIT':
+
self.logger.debug(f'committing {self.changes} changes')
+
if self.db_cnx.in_transaction:
+
self.db_cnx.execute('COMMIT')
+
checkpoint = self.db_cnx.execute('PRAGMA wal_checkpoint(PASSIVE)')
+
self.logger.debug(f'checkpoint: {checkpoint.fetchall()!r}')
+
self.changes = 0
+
self.logger.debug(f'qsize: {self.task_queue.qsize()}')
+
else:
+
sql, bindings = task
+
if not self.db_cnx.in_transaction:
+
self.db_cnx.execute('BEGIN')
+
self.db_cnx.execute(sql, bindings)
+
self.changes += self.db_cnx.changes()
+
self.task_queue.task_done()
+
self.db_cnx.close()
+
+
def stop(self):
+
self.task_queue.put('STOP')
+
class MostLikedFeed(BaseFeed):
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/most-liked'
-
SERVE_FEED_QUERY = """
-
select uri, create_ts, unixepoch('now', '-24 hours'), create_ts - unixepoch('now', '-24 hours'), likes
-
from posts
-
where create_ts >= unixepoch('now', '-24 hours')
-
order by likes desc, create_ts asc
-
limit :limit offset :offset
-
"""
DELETE_OLD_POSTS_QUERY = """
-
delete from posts
-
where create_ts < unixepoch('now', '-24 hours')
+
delete from posts where (
+
create_ts < unixepoch('now', '-15 minutes') and likes < 2
+
) or create_ts < unixepoch('now', '-24 hours');
"""
def __init__(self):
self.db_cnx = apsw.Connection('db/mostliked.db')
+
self.db_cnx.pragma('foreign_keys', True)
self.db_cnx.pragma('journal_mode', 'WAL')
self.db_cnx.pragma('wal_autocheckpoint', '0')
with self.db_cnx:
self.db_cnx.execute("""
create table if not exists posts (
-
uri text, create_ts timestamp, likes int
+
uri text primary key,
+
create_ts timestamp,
+
likes int
+
);
+
create table if not exists langs (
+
uri text,
+
lang text,
+
foreign key(uri) references posts(uri) on delete cascade
);
-
create unique index if not exists uri_idx on posts(uri);
-
create index if not exists create_ts_idx on posts(create_ts);
+
create index if not exists ts_idx on posts(create_ts);
""")
self.logger = logging.getLogger('feeds.mostliked')
+
self.db_writes = queue.Queue()
+
db_worker = DatabaseWorker('mostliked', 'db/mostliked.db', self.db_writes)
+
db_worker.start()
+
def process_commit(self, commit):
-
return
+
if commit['opType'] != 'c':
+
return
-
def delete_old_posts(self):
-
self.db_cnx.execute(self.DELETE_OLD_POSTS_QUERY)
-
self.logger.debug('deleted {} old posts'.format(self.db_cnx.changes()))
+
if commit['collection'] == 'app.bsky.feed.post':
+
record = commit.get('record')
+
post_uri = f"at://{commit['did']}/app.bsky.feed.post/{commit['rkey']}"
+
task = (
+
'insert or ignore into posts (uri, create_ts, likes) values (:uri, :ts, 0)',
+
{'uri': post_uri, 'ts': self.safe_timestamp(record.get('createdAt')).timestamp()}
+
)
+
self.db_writes.put(task)
+
+
langs = record.get('langs', [])
+
for lang in langs:
+
task = (
+
'insert or ignore into langs (uri, lang) values (:uri, :lang)',
+
{'uri': post_uri, 'lang': lang}
+
)
+
self.db_writes.put(task)
+
+
elif commit['collection'] == 'app.bsky.feed.like':
+
record = commit.get('record')
+
try:
+
subject_uri = record['subject']['uri']
+
except KeyError:
+
return
+
+
task = (
+
'update posts set likes = likes + 1 where uri = :uri',
+
{'uri': subject_uri}
+
)
+
self.db_writes.put(task)
def commit_changes(self):
-
self.delete_old_posts()
-
self.logger.debug('committing changes')
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
+
self.db_writes.put((self.DELETE_OLD_POSTS_QUERY, {}))
+
self.db_writes.put('COMMIT')
+
+
def generate_sql(self, limit, offset, langs):
+
bindings = []
+
sql = """
+
select posts.uri, create_ts, create_ts - unixepoch('now', '-15 minutes') as rem, likes, lang
+
from posts
+
left join langs on posts.uri = langs.uri
+
where
+
"""
+
if not '*' in langs:
+
lang_values = list(langs.values())
+
bindings.extend(lang_values)
+
sql += " OR ".join(['lang = ?'] * len(lang_values))
+
else:
+
sql += " 1=1 "
+
sql += """
+
order by likes desc, create_ts desc
+
limit ? offset ?
+
"""
+
bindings.extend([limit, offset])
+
return sql, bindings
def serve_feed(self, limit, offset, langs):
-
return [
-
'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.post/3l3cgg5vbc72i'
-
]
+
sql, bindings = self.generate_sql(limit, offset, langs)
+
cur = self.db_cnx.execute(sql, bindings)
+
return [row[0] for row in cur]
def serve_feed_debug(self, limit, offset, langs):
-
pass
+
sql, bindings = self.generate_sql(limit, offset, langs)
+
return apsw.ext.format_query_table(
+
self.db_cnx, sql, bindings,
+
string_sanitize=2, text_width=9999, use_unicode=True
+
)
-30
firehose_manager.py
···
-
import logging
-
-
import apsw
-
-
class FirehoseManager:
-
def __init__(self, fname='db/firehose.db'):
-
self.db_cnx = apsw.Connection(fname)
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
with self.db_cnx:
-
self.db_cnx.execute("create table if not exists firehose(key text unique, value text)")
-
-
self.logger = logging.getLogger('firehose.manager')
-
-
def get_sequence_number(self):
-
row = self.db_cnx.execute("select * from firehose where key = 'seq'").fetchone()
-
if row is None:
-
return None
-
(key, value) = row
-
return int(value)
-
-
def set_sequence_number(self, value):
-
self.logger.debug(f'setting sequence number = {value}')
-
-
with self.db_cnx:
-
self.db_cnx.execute(
-
"insert into firehose (key, value) values ('seq', :value) on conflict(key) do update set value = :value",
-
dict(value=value)
-
)
-
-
self.db_cnx.pragma('wal_checkpoint(RESTART)')