this repo has no description

Add feedgen/feedweb

+1
.gitignore
···
···
+
db/
+1
Pipfile
···
atproto = "*"
flask = "*"
requests = "*"
[dev-packages]
···
atproto = "*"
flask = "*"
requests = "*"
+
gunicorn = "*"
[dev-packages]
+22 -4
Pipfile.lock
···
{
"_meta": {
"hash": {
-
"sha256": "4c979af70167ffd0e10feab94039bc4cd6c633eafafc0ffc8fe610de279023a7"
},
"pipfile-spec": 6,
"requires": {
···
"markers": "python_version >= '3.8'",
"version": "==3.0.2"
},
"h11": {
"hashes": [
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
···
"markers": "python_version >= '3.7'",
"version": "==0.3.1"
},
"pycparser": {
"hashes": [
-
"sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9",
-
"sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"
],
-
"version": "==2.21"
},
"pydantic": {
"hashes": [
···
{
"_meta": {
"hash": {
+
"sha256": "35b6fce04f6f842ebca9cbcdd66f681bb94b7a913f3de9f67082df4986393af1"
},
"pipfile-spec": 6,
"requires": {
···
"markers": "python_version >= '3.8'",
"version": "==3.0.2"
},
+
"gunicorn": {
+
"hashes": [
+
"sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0",
+
"sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"
+
],
+
"index": "pypi",
+
"markers": "python_version >= '3.5'",
+
"version": "==21.2.0"
+
},
"h11": {
"hashes": [
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
···
"markers": "python_version >= '3.7'",
"version": "==0.3.1"
},
+
"packaging": {
+
"hashes": [
+
"sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5",
+
"sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"
+
],
+
"markers": "python_version >= '3.7'",
+
"version": "==24.0"
+
},
"pycparser": {
"hashes": [
+
"sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6",
+
"sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"
],
+
"markers": "python_version >= '3.8'",
+
"version": "==2.22"
},
"pydantic": {
"hashes": [
+63
feedgen.py
···
···
+
#!/usr/bin/env python3
+
+
import asyncio
+
import dag_cbor
+
import redis
+
import sys
+
import websockets
+
from atproto import CAR
+
from io import BytesIO
+
+
from feeds import Manager
+
from feeds.rapidfire import RapidFireFeed
+
+
async def firehose_events():
+
redis_cnx = redis.Redis()
+
relay_url = 'wss://bsky.network/xrpc/com.atproto.sync.subscribeRepos'
+
firehose_seq = redis_cnx.get('dev.edavis.feedgen.seq')
+
if firehose_seq:
+
relay_url += f'?cursor={firehose_seq.decode()}'
+
+
sys.stdout.write(f'opening websocket connection to {relay_url}\n')
+
sys.stdout.flush()
+
+
async with websockets.connect(relay_url, ping_timeout=None) as firehose:
+
op_count = 0
+
while True:
+
frame = BytesIO(await firehose.recv())
+
header = dag_cbor.decode(frame, allow_concat=True)
+
if header['op'] != 1 or header['t'] != '#commit':
+
continue
+
+
payload = dag_cbor.decode(frame)
+
if payload['tooBig']:
+
continue
+
+
blocks = payload.pop('blocks')
+
car_parsed = CAR.from_bytes(blocks)
+
message = payload.copy()
+
del message['ops']
+
message['commit'] = message['commit'].encode('base32')
+
+
for op in payload['ops']:
+
repo_op = op.copy()
+
if op['cid'] is not None:
+
repo_op['cid'] = repo_op['cid'].encode('base32')
+
repo_op['record'] = car_parsed.blocks[repo_op['cid']]
+
message['op'] = repo_op
+
yield message
+
+
op_count += 1
+
if op_count % 500 == 0:
+
redis_cnx.set('dev.edavis.feedgen.seq', payload['seq'])
+
+
async def main():
+
manager = Manager()
+
manager.register(RapidFireFeed)
+
+
async for commit in firehose_events():
+
manager.process(commit)
+
+
+
if __name__ == '__main__':
+
asyncio.run(main())
+18
feeds/__init__.py
···
···
+
class Manager:
+
def __init__(self):
+
self.feeds = []
+
self.webs = {}
+
+
def register(self, feed):
+
f = feed()
+
self.webs[feed.FEED_URI] = f
+
self.feeds.append(f)
+
+
def process(self, commit):
+
for feed in self.feeds:
+
feed.process(commit)
+
+
def serve(self, feed_uri, limit, offset):
+
feed = self.webs.get(feed_uri)
+
if feed is not None:
+
return feed.serve(limit, offset)
+60
feeds/rapidfire.py
···
···
+
import sqlite3
+
+
MAX_TEXT_LENGTH = 140
+
+
class RapidFireFeed:
+
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/rapidfire'
+
+
def __init__(self):
+
self.checkpoint = 0
+
self.db_cnx = sqlite3.connect('db/rapidfire.db')
+
with self.db_cnx:
+
self.db_cnx.executescript("""
+
pragma journal_mode = WAL;
+
pragma synchronous = off;
+
create table if not exists posts (uri text, create_ts timestamp);
+
create index if not exists create_ts_idx on posts(create_ts);
+
""")
+
+
def process(self, commit):
+
op = commit['op']
+
if op['action'] != 'create':
+
return
+
+
collection, _ = op['path'].split('/')
+
if collection != 'app.bsky.feed.post':
+
return
+
+
ts = commit['time']
+
record = op['record']
+
+
if all([
+
len(record['text']) <= MAX_TEXT_LENGTH,
+
all(0x20 <= ord(c) <= 0x7e for c in record['text']),
+
record.get('reply') is None,
+
record.get('embed') is None,
+
record.get('facets') is None
+
]):
+
repo = commit['repo']
+
path = op['path']
+
post_uri = f'at://{repo}/{path}'
+
self.db_cnx.execute(
+
'insert into posts (uri, create_ts) values (:uri, :ts)',
+
dict(uri=post_uri, ts=ts)
+
)
+
+
self.checkpoint += 1
+
if self.checkpoint % 10 == 0:
+
self.db_cnx.execute("delete from posts where strftime('%s', create_ts) < strftime('%s', 'now', '-1 hour')")
+
self.db_cnx.commit()
+
+
def serve(self, limit, offset):
+
cur = self.db_cnx.execute(
+
"select uri from posts order by create_ts desc limit :limit offset :offset",
+
dict(limit=limit, offset=offset)
+
)
+
+
feed = [dict(post=uri) for (uri,) in cur]
+
offset += len(feed)
+
+
return dict(offset=str(offset), feed=feed)
+42
feedweb.py
···
···
+
#!/usr/bin/env python3
+
+
from feeds import Manager
+
from feeds.rapidfire import RapidFireFeed
+
+
from flask import Flask, request
+
app = Flask(__name__)
+
+
@app.route('/.well-known/did.json')
+
def well_known_did():
+
service = {
+
'id': '#bsky_fg',
+
'type': 'BskyFeedGenerator',
+
'serviceEndpoint': 'https://feedgen.edavis.dev',
+
}
+
return {
+
'@context': ['https://www.w3.org/ns/did/v1'],
+
'id': 'did:web:feedgen.edavis.dev',
+
'service': [service],
+
}
+
+
@app.route('/xrpc/app.bsky.feed.getFeedSkeleton')
+
def get_feed_skeleton():
+
manager = Manager()
+
manager.register(RapidFireFeed)
+
+
try:
+
limit = int(request.args.get('limit', 50))
+
except ValueError:
+
limit = 50
+
+
try:
+
offset = int(request.args.get('cursor', 0))
+
except ValueError:
+
offset = 0
+
+
feed_uri = request.args['feed']
+
return manager.serve(feed_uri, limit, offset)
+
+
+
if __name__ == '__main__':
+
app.run(debug=True)
+16
service/feedgen.service
···
···
+
[Unit]
+
Description=Bsky Feedgen
+
After=network.target syslog.target
+
+
[Service]
+
Type=simple
+
User=eric
+
WorkingDirectory=/home/eric/bsky-tools
+
ExecStart=/home/eric/.local/bin/pipenv run ./feedgen.py
+
TimeoutSec=15
+
Restart=on-failure
+
RestartSec=1
+
StandardOutput=journal
+
+
[Install]
+
WantedBy=multi-user.target
+16
service/feedweb.service
···
···
+
[Unit]
+
Description=Bsky Feedweb
+
After=network.target syslog.target
+
+
[Service]
+
Type=simple
+
User=eric
+
WorkingDirectory=/home/eric/bsky-tools
+
ExecStart=/home/eric/.local/bin/pipenv run gunicorn -w 4 -b 127.0.0.1:9060 feedweb:app
+
TimeoutSec=15
+
Restart=on-failure
+
RestartSec=1
+
StandardOutput=journal
+
+
[Install]
+
WantedBy=multi-user.target