···
1
+
#!/usr/bin/env python3
8
+
from datetime import datetime, timezone
9
+
from firehose_utils import commit_ops
10
+
from io import BytesIO
12
+
app_bsky_allowlist = set([
13
+
'app.bsky.actor.profile',
14
+
'app.bsky.feed.generator',
15
+
'app.bsky.feed.like',
16
+
'app.bsky.feed.post',
17
+
'app.bsky.feed.repost',
18
+
'app.bsky.feed.threadgate',
19
+
'app.bsky.graph.block',
20
+
'app.bsky.graph.follow',
21
+
'app.bsky.graph.list',
22
+
'app.bsky.graph.listblock',
23
+
'app.bsky.graph.listitem',
24
+
'app.bsky.labeler.service',
28
+
redis_cnx = redis.Redis()
29
+
redis_pipe = redis_cnx.pipeline()
30
+
redis_sub = redis_cnx.pubsub(ignore_subscribe_messages=True)
32
+
db_fname = '/opt/muninsky/users.db'
33
+
db_fname = 'users.db'
35
+
db_cnx = sqlite3.connect(db_fname)
37
+
db_cnx.executescript("""
38
+
PRAGMA journal_mode = WAL;
39
+
PRAGMA synchronous = off;
40
+
CREATE TABLE IF NOT EXISTS users (did TEXT, ts TIMESTAMP);
41
+
CREATE UNIQUE INDEX IF NOT EXISTS did_idx on users(did);
42
+
CREATE INDEX IF NOT EXISTS ts_idx on users(ts);
46
+
redis_sub.subscribe('bsky-tools:firehose:stream')
47
+
for event in redis_sub.listen():
48
+
frame = BytesIO(event['data'])
49
+
header = dag_cbor.decode(frame, allow_concat=True)
50
+
if header['op'] != 1 or header['t'] != '#commit':
53
+
payload = dag_cbor.decode(frame)
54
+
if payload['tooBig']:
55
+
# TODO(ejd): how handle these?
58
+
for op in commit_ops(payload):
59
+
if op['action'] != 'create':
62
+
collection, _ = op['path'].split('/')
63
+
if collection not in app_bsky_allowlist:
66
+
repo_did = payload['repo']
67
+
ts = datetime.now(timezone.utc).timestamp()
69
+
'insert into users values (:did, :ts) on conflict (did) do update set ts = :ts',
70
+
{'did': repo_did, 'ts': ts}
75
+
.incr('dev.edavis.muninsky.ops')
78
+
if op_count % 500 == 0:
79
+
payload_seq = payload['seq']
80
+
sys.stdout.write(f'checkpoint: seq: {payload_seq}\n')
81
+
redis_pipe.set('dev.edavis.muninsky.seq', payload_seq)
82
+
redis_pipe.execute()
86
+
if __name__ == '__main__':