this repo has no description

feat: clear out the Python feeds

These will be (slowly) migrated into edavis/bsky-feeds

-53
feed_manager.py
···
-
from fnmatch import fnmatchcase
-
-
from feeds.battle import BattleFeed
-
from feeds.rapidfire import RapidFireFeed
-
from feeds.homeruns import HomeRunsTeamFeed
-
from feeds.norazone_interesting import NoraZoneInteresting
-
from feeds.sevendirtywords import SevenDirtyWordsFeed
-
from feeds.ratio import RatioFeed
-
from feeds.outlinetags import OutlineTagsFeed
-
from feeds.popqp import PopularQuotePostsFeed
-
-
class FeedManager:
-
def __init__(self):
-
self.feeds = {}
-
-
def register(self, feed):
-
self.feeds[feed.FEED_URI] = feed()
-
-
def process_commit(self, commit):
-
for feed in self.feeds.values():
-
feed.process_commit(commit)
-
-
def serve_feed(self, feed_uri, limit, offset, langs, debug=False):
-
for pattern, feed in self.feeds.items():
-
if fnmatchcase(feed_uri, pattern):
-
break
-
else:
-
raise Exception('no matching feed pattern found')
-
-
if '*' in pattern and debug:
-
return feed.serve_wildcard_feed_debug(feed_uri, limit, offset, langs)
-
-
elif '*' in pattern and not debug:
-
return feed.serve_wildcard_feed(feed_uri, limit, offset, langs)
-
-
elif '*' not in pattern and debug:
-
return feed.serve_feed_debug(limit, offset, langs)
-
-
elif '*' not in pattern and not debug:
-
return feed.serve_feed(limit, offset, langs)
-
-
def commit_changes(self):
-
for feed in self.feeds.values():
-
feed.commit_changes()
-
-
def stop_all(self):
-
for feed in self.feeds.values():
-
try:
-
feed.stop_db_worker()
-
except AttributeError:
-
pass
-
-
feed_manager = FeedManager()
-76
feedgen.py
···
-
#!/usr/bin/env python3
-
-
import asyncio
-
from io import BytesIO
-
import json
-
import logging
-
import signal
-
-
from atproto import CAR
-
import dag_cbor
-
import websockets
-
-
from feed_manager import feed_manager
-
-
logging.basicConfig(
-
format='%(asctime)s - %(levelname)-5s - %(name)-20s - %(message)s',
-
level=logging.DEBUG
-
)
-
logging.getLogger('').setLevel(logging.WARNING)
-
logging.getLogger('feeds').setLevel(logging.DEBUG)
-
logging.getLogger('firehose').setLevel(logging.DEBUG)
-
logging.getLogger('feedgen').setLevel(logging.DEBUG)
-
-
logger = logging.getLogger('feedgen')
-
-
async def firehose_events():
-
relay_url = 'ws://localhost:6008/subscribe'
-
-
logger = logging.getLogger('feeds.events')
-
logger.info(f'opening websocket connection to {relay_url}')
-
-
async with websockets.connect(relay_url, ping_timeout=60) as firehose:
-
while True:
-
payload = BytesIO(await firehose.recv())
-
yield json.load(payload)
-
-
async def main():
-
event_count = 0
-
-
async for commit in firehose_events():
-
feed_manager.process_commit(commit)
-
event_count += 1
-
if event_count % 2500 == 0:
-
feed_manager.commit_changes()
-
-
def handle_exception(loop, context):
-
msg = context.get("exception", context["message"])
-
logger.error(f"Caught exception: {msg}")
-
logger.info("Shutting down...")
-
asyncio.create_task(shutdown(loop))
-
-
async def shutdown(loop, signal=None):
-
if signal:
-
logger.info(f'received exit signal {signal.name}')
-
feed_manager.stop_all()
-
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
-
[task.cancel() for task in tasks]
-
logger.info(f'cancelling {len(tasks)} outstanding tasks')
-
await asyncio.gather(*tasks, return_exceptions=True)
-
loop.stop()
-
-
if __name__ == '__main__':
-
loop = asyncio.get_event_loop()
-
catch_signals = (signal.SIGTERM, signal.SIGINT)
-
for sig in catch_signals:
-
loop.add_signal_handler(
-
sig,
-
lambda s=sig: asyncio.create_task(shutdown(loop, signal=s))
-
)
-
loop.set_exception_handler(handle_exception)
-
-
try:
-
loop.create_task(main())
-
loop.run_forever()
-
finally:
-
loop.close()
-65
feeds/__init__.py
···
-
from datetime import datetime, timezone, timedelta
-
-
class BaseFeed:
-
def process_commit(self, commit):
-
raise NotImplementedError
-
-
def serve_feed(self, limit, offset, langs):
-
raise NotImplementedError
-
-
def serve_wildcard_feed(self, feed_uri, limit, offset, langs):
-
raise NotImplementedError
-
-
def commit_changes(self):
-
raise NotImplementedError
-
-
def parse_timestamp(self, timestamp):
-
# https://atproto.com/specs/lexicon#datetime
-
formats = {
-
# preferred
-
'1985-04-12T23:20:50.123Z': '%Y-%m-%dT%H:%M:%S.%f%z',
-
# '1985-04-12T23:20:50.123456Z': '%Y-%m-%dT%H:%M:%S.%f%z',
-
# '1985-04-12T23:20:50.120Z': '%Y-%m-%dT%H:%M:%S.%f%z',
-
# '1985-04-12T23:20:50.120000Z': '%Y-%m-%dT%H:%M:%S.%f%z',
-
-
# supported
-
# '1985-04-12T23:20:50.12345678912345Z': '',
-
'1985-04-12T23:20:50Z': '%Y-%m-%dT%H:%M:%S%z',
-
# '1985-04-12T23:20:50.0Z': '%Y-%m-%dT%H:%M:%S.%f%z',
-
# '1985-04-12T23:20:50.123+00:00': '%Y-%m-%dT%H:%M:%S.%f%z',
-
# '1985-04-12T23:20:50.123-07:00': '%Y-%m-%dT%H:%M:%S.%f%z',
-
}
-
-
for format in formats.values():
-
try:
-
ts = datetime.strptime(timestamp, format)
-
except ValueError:
-
continue
-
else:
-
return ts
-
-
return datetime.now(timezone.utc)
-
-
def safe_timestamp(self, timestamp):
-
utc_now = datetime.now(timezone.utc)
-
if timestamp is None:
-
return utc_now
-
-
parsed = self.parse_timestamp(timestamp)
-
if parsed.timestamp() <= 0:
-
return utc_now
-
elif parsed - timedelta(minutes=2) < utc_now:
-
return parsed
-
elif parsed > utc_now:
-
return utc_now
-
-
def transaction_begin(self, db):
-
if not db.in_transaction:
-
db.execute('BEGIN')
-
-
def transaction_commit(self, db):
-
if db.in_transaction:
-
db.execute('COMMIT')
-
-
def wal_checkpoint(self, db, mode='PASSIVE'):
-
return db.execute(f'PRAGMA wal_checkpoint({mode})')
-100
feeds/battle.py
···
-
import logging
-
-
import apsw
-
import apsw.ext
-
import grapheme
-
-
from . import BaseFeed
-
-
class BattleFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/battle'
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/battle.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (
-
uri text,
-
grapheme_length integer,
-
create_ts timestamp,
-
lang text
-
);
-
create unique index if not exists ll_idx on posts(grapheme_length, lang);
-
""")
-
-
self.logger = logging.getLogger('feeds.battle')
-
-
def process_commit(self, commit):
-
if commit['opType'] != 'c':
-
return
-
-
if commit['collection'] != 'app.bsky.feed.post':
-
return
-
-
record = commit.get('record')
-
if record is None:
-
return
-
-
repo = commit['did']
-
rkey = commit['rkey']
-
post_uri = f'at://{repo}/app.bsky.feed.post/{rkey}'
-
length = grapheme.length(record.get('text', ''))
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
-
self.transaction_begin(self.db_cnx)
-
-
langs = record.get('langs') or ['']
-
for lang in langs:
-
self.db_cnx.execute("""
-
insert into posts(uri, grapheme_length, create_ts, lang)
-
values(:uri, :length, :ts, :lang)
-
on conflict do update set uri = :uri, create_ts = :ts
-
""", dict(uri=post_uri, length=length, ts=ts, lang=lang))
-
-
def commit_changes(self):
-
self.logger.debug('committing changes')
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_feed(self, limit, offset, langs):
-
if '*' in langs:
-
cur = self.db_cnx.execute("""
-
select uri
-
from posts
-
order by grapheme_length asc
-
limit :limit offset :offset
-
""", dict(limit=limit, offset=offset))
-
return [uri for (uri,) in cur]
-
else:
-
lang_values = list(langs.values())
-
lang_selects = ['select uri, grapheme_length from posts where lang = ?'] * len(lang_values)
-
lang_clause = ' union '.join(lang_selects)
-
cur = self.db_cnx.execute(
-
lang_clause + ' order by grapheme_length asc limit ? offset ?',
-
[*lang_values, limit, offset]
-
)
-
return [uri for (uri, grapheme_length) in cur]
-
-
def serve_feed_debug(self, limit, offset, langs):
-
if '*' in langs:
-
query = """
-
select *, unixepoch('now') - create_ts as age_seconds
-
from posts
-
order by grapheme_length asc
-
limit :limit offset :offset
-
"""
-
bindings = [limit, offset]
-
else:
-
lang_values = list(langs.values())
-
lang_selects = ["select *, unixepoch('now') - create_ts as age_seconds from posts where lang = ?"] * len(lang_values)
-
lang_clause = ' union '.join(lang_selects)
-
query = lang_clause + ' order by grapheme_length asc limit ? offset ?'
-
bindings = [*lang_values, limit, offset]
-
-
return apsw.ext.format_query_table(
-
self.db_cnx, query, bindings,
-
string_sanitize=2, text_width=9999, use_unicode=True
-
)
-107
feeds/homeruns.py
···
-
import logging
-
-
import apsw
-
import apsw.ext
-
-
from . import BaseFeed
-
-
MLBHRS_DID = 'did:plc:pnksqegntq5t3o7pusp2idx3'
-
-
TEAM_ABBR_LOOKUP = {
-
"OAK":"OaklandAthletics",
-
"PIT":"PittsburghPirates",
-
"SDN":"SanDiegoPadres",
-
"SEA":"SeattleMariners",
-
"SFN":"SanFranciscoGiants",
-
"SLN":"StLouisCardinals",
-
"TBA":"TampaBayRays",
-
"TEX":"TexasRangers",
-
"TOR":"TorontoBlueJays",
-
"MIN":"MinnesotaTwins",
-
"PHI":"PhiladelphiaPhillies",
-
"ATL":"AtlantaBraves",
-
"CHA":"ChicagoWhiteSox",
-
"MIA":"MiamiMarlins",
-
"NYA":"NewYorkYankees",
-
"MIL":"MilwaukeeBrewers",
-
"LAA":"LosAngelesAngels",
-
"ARI":"ArizonaDiamondbacks",
-
"BAL":"BaltimoreOrioles",
-
"BOS":"BostonRedSox",
-
"CHN":"ChicagoCubs",
-
"CIN":"CincinnatiReds",
-
"CLE":"ClevelandGuardians",
-
"COL":"ColoradoRockies",
-
"DET":"DetroitTigers",
-
"HOU":"HoustonAstros",
-
"KCA":"KansasCityRoyals",
-
"LAN":"LosAngelesDodgers",
-
"WAS":"WashingtonNationals",
-
"NYN":"NewYorkMets",
-
}
-
-
class HomeRunsTeamFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:pnksqegntq5t3o7pusp2idx3/app.bsky.feed.generator/team:*'
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/homeruns.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (uri text, tag text);
-
create index if not exists tag_idx on posts(tag);
-
""")
-
-
self.logger = logging.getLogger('feeds.homeruns')
-
-
def process_commit(self, commit):
-
if commit['did'] != MLBHRS_DID:
-
return
-
-
if commit['opType'] != 'c':
-
return
-
-
if commit['collection'] != 'app.bsky.feed.post':
-
return
-
-
record = commit.get('record')
-
if record is None:
-
return
-
-
uri = 'at://{repo}/app.bsky.feed.post/{rkey}'.format(
-
repo = commit['did'],
-
rkey = commit['rkey']
-
)
-
tags = record.get('tags', [])
-
-
self.logger.debug(f'adding {uri!r} under {tags!r}')
-
-
with self.db_cnx:
-
for tag in tags:
-
self.db_cnx.execute(
-
"insert into posts (uri, tag) values (:uri, :tag)",
-
dict(uri=uri, tag=tag)
-
)
-
-
def commit_changes(self):
-
self.logger.debug('committing changes')
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_wildcard_feed(self, feed_uri, limit, offset, langs):
-
prefix, sep, team_abbr = feed_uri.rpartition(':')
-
team_tag = TEAM_ABBR_LOOKUP[team_abbr]
-
-
cur = self.db_cnx.execute("""
-
select uri
-
from posts
-
where tag = :tag
-
order by uri desc
-
limit :limit offset :offset
-
""", dict(tag=team_tag, limit=limit, offset=offset))
-
-
return [uri for (uri,) in cur]
-
-
def serve_wildcard_feed_debug(self, feed_uri, limit, offset, langs):
-
pass
-35
feeds/norazone_interesting.py
···
-
import logging
-
-
from atproto import Client, models
-
import apsw
-
import apsw.ext
-
-
from . import BaseFeed
-
-
# https://bsky.app/profile/nora.zone/post/3kv35hqi4a22b
-
TARGET_QUOTE_URI = 'at://did:plc:4qqizocrnriintskkh6trnzv/app.bsky.feed.post/3kv35hqi4a22b'
-
-
class NoraZoneInteresting(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/nz-interesting'
-
-
def __init__(self):
-
self.client = Client('https://public.api.bsky.app')
-
-
def process_commit(self, commit):
-
pass
-
-
def commit_changes(self):
-
pass
-
-
def serve_feed(self, limit, cursor, langs):
-
quotes = self.client.app.bsky.feed.get_quotes(
-
models.AppBskyFeedGetQuotes.Params(
-
uri = TARGET_QUOTE_URI,
-
limit = limit,
-
cursor = cursor,
-
)
-
)
-
return {
-
'cursor': quotes.cursor,
-
'feed': [dict(post=post.uri) for post in quotes.posts],
-
}
-68
feeds/outlinetags.py
···
-
import logging
-
-
import apsw
-
import apsw.ext
-
-
from . import BaseFeed
-
-
class OutlineTagsFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/outline'
-
SERVE_FEED_QUERY = """
-
select uri, create_ts
-
from posts
-
order by create_ts desc
-
limit :limit offset :offset
-
"""
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/outlinetags.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (uri text, create_ts timestamp);
-
create unique index if not exists create_ts_idx on posts(create_ts);
-
""")
-
-
self.logger = logging.getLogger('feeds.outlinetags')
-
-
def process_commit(self, commit):
-
if commit['opType'] != 'c':
-
return
-
-
if commit['collection'] != 'app.bsky.feed.post':
-
return
-
-
record = commit.get('record')
-
if record is None:
-
return
-
-
if not record.get('tags', []):
-
return
-
-
repo = commit['did']
-
rkey = commit['rkey']
-
post_uri = f'at://{repo}/app.bsky.feed.post/{rkey}'
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
self.transaction_begin(self.db_cnx)
-
self.db_cnx.execute(
-
'insert into posts (uri, create_ts) values (:uri, :ts)',
-
dict(uri=post_uri, ts=ts)
-
)
-
-
def commit_changes(self):
-
self.logger.debug('committing changes')
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_feed(self, limit, offset, langs):
-
cur = self.db_cnx.execute(self.SERVE_FEED_QUERY, dict(limit=limit, offset=offset))
-
return [row[0] for row in cur]
-
-
def serve_feed_debug(self, limit, offset, langs):
-
bindings = dict(limit=limit, offset=offset)
-
return apsw.ext.format_query_table(
-
self.db_cnx, self.SERVE_FEED_QUERY, bindings,
-
string_sanitize=2, text_width=9999, use_unicode=True
-
)
-90
feeds/popqp.py
···
-
import logging
-
-
import apsw
-
import apsw.ext
-
-
from . import BaseFeed
-
-
class PopularQuotePostsFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/popqp'
-
SERVE_FEED_QUERY = """
-
select uri, create_ts, update_ts, quote_count, exp( -1 * ( ( unixepoch('now') - create_ts ) / 10800.0 ) ) as decay,
-
quote_count * exp( -1 * ( ( unixepoch('now') - create_ts ) / 10800.0 ) ) as score
-
from posts
-
order by quote_count * exp( -1 * ( ( unixepoch('now') - create_ts ) / 10800.0 ) ) desc
-
limit :limit offset :offset
-
"""
-
DELETE_OLD_POSTS_QUERY = """
-
delete from posts where
-
quote_count * exp( -1 * ( ( unixepoch('now') - create_ts ) / 10800.0 ) ) < 1.0
-
and create_ts < unixepoch('now', '-24 hours')
-
"""
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/popqp.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (
-
uri text, create_ts timestamp, update_ts timestamp, quote_count int
-
);
-
create unique index if not exists uri_idx on posts(uri);
-
""")
-
-
self.logger = logging.getLogger('feeds.popqp')
-
-
def process_commit(self, commit):
-
if commit['opType'] != 'c':
-
return
-
-
if commit['collection'] != 'app.bsky.feed.post':
-
return
-
-
record = commit.get('record')
-
if record is None:
-
return
-
-
embed = record.get('embed')
-
if embed is None:
-
return
-
-
embed_type = embed.get('$type')
-
if embed_type == 'app.bsky.embed.record':
-
quote_post_uri = embed['record']['uri']
-
elif embed_type == 'app.bsky.embed.recordWithMedia':
-
quote_post_uri = embed['record']['record']['uri']
-
else:
-
return
-
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
self.transaction_begin(self.db_cnx)
-
-
self.db_cnx.execute("""
-
insert into posts (uri, create_ts, update_ts, quote_count)
-
values (:uri, :ts, :ts, 1)
-
on conflict (uri) do
-
update set quote_count = quote_count + 1, update_ts = :ts
-
""", dict(uri=quote_post_uri, ts=ts))
-
-
def delete_old_posts(self):
-
self.db_cnx.execute(self.DELETE_OLD_POSTS_QUERY)
-
self.logger.debug('deleted {} old posts'.format(self.db_cnx.changes()))
-
-
def commit_changes(self):
-
self.delete_old_posts()
-
self.logger.debug('committing changes')
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_feed(self, limit, offset, langs):
-
cur = self.db_cnx.execute(self.SERVE_FEED_QUERY, dict(limit=limit, offset=offset))
-
return [row[0] for row in cur]
-
-
def serve_feed_debug(self, limit, offset, langs):
-
bindings = dict(limit=limit, offset=offset)
-
return apsw.ext.format_query_table(
-
self.db_cnx, self.SERVE_FEED_QUERY, bindings,
-
string_sanitize=2, text_width=9999, use_unicode=True
-
)
-98
feeds/rapidfire.py
···
-
import logging
-
-
import apsw
-
import apsw.ext
-
import grapheme
-
-
from . import BaseFeed
-
-
MAX_TEXT_LENGTH = 140
-
-
class RapidFireFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/rapidfire'
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/rapidfire.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (uri text, create_ts timestamp, lang text);
-
create index if not exists create_ts_idx on posts(create_ts);
-
""")
-
-
self.logger = logging.getLogger('feeds.rapidfire')
-
-
def process_commit(self, commit):
-
if commit['opType'] != 'c':
-
return
-
-
if commit['collection'] != 'app.bsky.feed.post':
-
return
-
-
record = commit.get('record')
-
if record is None:
-
return
-
-
if all([
-
grapheme.length(record.get('text', '')) <= MAX_TEXT_LENGTH,
-
record.get('reply') is None,
-
record.get('embed') is None,
-
record.get('facets') is None
-
]):
-
repo = commit['did']
-
rkey = commit['rkey']
-
post_uri = f'at://{repo}/app.bsky.feed.post/{rkey}'
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
-
self.transaction_begin(self.db_cnx)
-
-
langs = record.get('langs') or ['']
-
for lang in langs:
-
self.db_cnx.execute(
-
'insert into posts (uri, create_ts, lang) values (:uri, :ts, :lang)',
-
dict(uri=post_uri, ts=ts, lang=lang)
-
)
-
-
def delete_old_posts(self):
-
self.db_cnx.execute(
-
"delete from posts where create_ts < unixepoch('now', '-15 minutes')"
-
)
-
self.logger.debug('deleted {} old posts'.format(self.db_cnx.changes()))
-
-
def commit_changes(self):
-
self.delete_old_posts()
-
self.logger.debug('committing changes')
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_feed(self, limit, offset, langs):
-
if '*' in langs:
-
cur = self.db_cnx.execute(
-
"select uri from posts order by create_ts desc limit :limit offset :offset",
-
dict(limit=limit, offset=offset)
-
)
-
return [uri for (uri,) in cur]
-
else:
-
lang_values = list(langs.values())
-
lang_selects = ['select uri, create_ts from posts where lang = ?'] * len(lang_values)
-
lang_clause = ' union '.join(lang_selects)
-
cur = self.db_cnx.execute(
-
lang_clause + ' order by create_ts desc limit ? offset ?',
-
[*lang_values, limit, offset]
-
)
-
return [uri for (uri, create_ts) in cur]
-
-
def serve_feed_debug(self, limit, offset, langs):
-
query = """
-
select *, unixepoch('now') - create_ts as age_seconds
-
from posts
-
order by create_ts desc
-
limit :limit offset :offset
-
"""
-
bindings = dict(limit=limit, offset=offset)
-
return apsw.ext.format_query_table(
-
self.db_cnx, query, bindings,
-
string_sanitize=2, text_width=9999, use_unicode=True
-
)
-144
feeds/ratio.py
···
-
import logging
-
-
import apsw
-
import apsw.ext
-
-
from . import BaseFeed
-
-
class RatioFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/ratio'
-
SERVE_FEED_QUERY = """
-
with served as (
-
select
-
uri,
-
create_ts,
-
( unixepoch('now') - create_ts ) as age_seconds,
-
replies,
-
quoteposts,
-
likes,
-
reposts,
-
( replies + quoteposts ) / ( likes + reposts + 1 ) as ratio,
-
exp( -1 * ( ( unixepoch('now') - create_ts ) / ( 3600.0 * 16 ) ) ) as decay
-
from posts
-
)
-
select
-
*,
-
( ratio * decay ) as score
-
from served
-
where replies > 15 and ratio > 2.5
-
order by score desc
-
limit :limit offset :offset
-
"""
-
DELETE_OLD_POSTS_QUERY = """
-
delete from posts
-
where
-
create_ts < unixepoch('now', '-5 days')
-
"""
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/ratio.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (
-
uri text, create_ts timestamp,
-
replies float, likes float, reposts float, quoteposts float
-
);
-
create unique index if not exists uri_idx on posts(uri);
-
""")
-
-
self.logger = logging.getLogger('feeds.ratio')
-
-
def process_commit(self, commit):
-
if commit['opType'] != 'c':
-
return
-
-
subject_uri = None
-
is_reply = False
-
is_quotepost = False
-
-
if commit['collection'] in {'app.bsky.feed.like', 'app.bsky.feed.repost'}:
-
record = commit.get('record')
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
try:
-
subject_uri = record['subject']['uri']
-
except KeyError:
-
return
-
elif commit['collection'] == 'app.bsky.feed.post':
-
record = commit.get('record')
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
if record.get('reply') is not None:
-
is_reply = True
-
try:
-
subject_uri = record['reply']['parent']['uri']
-
except KeyError:
-
return
-
-
# only count non-OP replies
-
if subject_uri.startswith('at://' + commit['did']):
-
return
-
-
elif record.get('embed') is not None:
-
is_quotepost = True
-
t = record['embed']['$type']
-
if t == 'app.bsky.embed.record':
-
try:
-
subject_uri = record['embed']['record']['uri']
-
except KeyError:
-
return
-
elif t == 'app.bsky.embed.recordWithMedia':
-
try:
-
subject_uri = record['embed']['record']['record']['uri']
-
except KeyError:
-
return
-
-
if subject_uri is None:
-
return
-
-
params = {
-
'uri': subject_uri,
-
'ts': ts,
-
'is_reply': int(is_reply),
-
'is_like': int(commit['collection'] == 'app.bsky.feed.like'),
-
'is_repost': int(commit['collection'] == 'app.bsky.feed.repost'),
-
'is_quotepost': int(is_quotepost),
-
}
-
-
self.transaction_begin(self.db_cnx)
-
-
self.db_cnx.execute("""
-
insert into posts(uri, create_ts, replies, likes, reposts, quoteposts)
-
values (:uri, :ts,
-
case when :is_reply then 1 else 0 end,
-
case when :is_like then 1 else 0 end,
-
case when :is_repost then 1 else 0 end,
-
case when :is_quotepost then 1 else 0 end)
-
on conflict(uri)
-
do update set
-
replies = replies + case when :is_reply then 1 else 0 end,
-
likes = likes + case when :is_like then 1 else 0 end,
-
reposts = reposts + case when :is_repost then 1 else 0 end,
-
quoteposts = quoteposts + case when :is_quotepost then 1 else 0 end
-
""", params)
-
-
def delete_old_posts(self):
-
self.db_cnx.execute(self.DELETE_OLD_POSTS_QUERY)
-
-
def commit_changes(self):
-
self.logger.debug('committing changes')
-
self.delete_old_posts()
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_feed(self, limit, offset, langs):
-
cur = self.db_cnx.execute(self.SERVE_FEED_QUERY, dict(limit=limit, offset=offset))
-
return [row[0] for row in cur]
-
-
def serve_feed_debug(self, limit, offset, langs):
-
bindings = dict(limit=limit, offset=offset)
-
return apsw.ext.format_query_table(
-
self.db_cnx, self.SERVE_FEED_QUERY, bindings,
-
string_sanitize=2, text_width=9999, use_unicode=True
-
)
-80
feeds/sevendirtywords.py
···
-
import logging
-
import re
-
-
import apsw
-
import apsw.ext
-
-
from . import BaseFeed
-
-
# https://en.wikipedia.org/wiki/Seven_dirty_words
-
SDW_REGEX = re.compile(r'^(shit|piss|fuck|cunt|cocksucker|motherfucker|tits)[!,./;?~ ]*$', re.I|re.A)
-
-
class SevenDirtyWordsFeed(BaseFeed):
-
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/sdw'
-
-
def __init__(self):
-
self.db_cnx = apsw.Connection('db/sdw.db')
-
self.db_cnx.pragma('journal_mode', 'WAL')
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
-
-
with self.db_cnx:
-
self.db_cnx.execute("""
-
create table if not exists posts (uri text, create_ts timestamp);
-
create unique index if not exists create_ts_idx on posts(create_ts);
-
""")
-
-
self.logger = logging.getLogger('feeds.sdw')
-
-
def process_commit(self, commit):
-
if commit['opType'] != 'c':
-
return
-
-
if commit['collection'] != 'app.bsky.feed.post':
-
return
-
-
record = commit.get('record')
-
if record is None:
-
return
-
-
conds = [
-
record.get('reply') is None,
-
record.get('embed') is None,
-
record.get('facets') is None,
-
SDW_REGEX.search(record.get('text', '')) is not None,
-
]
-
-
if not all(conds):
-
return
-
-
repo = commit['did']
-
rkey = commit['rkey']
-
post_uri = f'at://{repo}/app.bsky.feed.post/{rkey}'
-
ts = self.safe_timestamp(record.get('createdAt')).timestamp()
-
self.transaction_begin(self.db_cnx)
-
self.db_cnx.execute(
-
'insert into posts (uri, create_ts) values (:uri, :ts)',
-
dict(uri=post_uri, ts=ts)
-
)
-
-
def commit_changes(self):
-
self.logger.debug('committing changes')
-
self.transaction_commit(self.db_cnx)
-
self.wal_checkpoint(self.db_cnx, 'RESTART')
-
-
def serve_feed(self, limit, offset, langs):
-
cur = self.db_cnx.execute("""
-
select uri
-
from posts
-
order by create_ts desc
-
limit :limit
-
offset :offset
-
""", dict(limit=limit, offset=offset))
-
return [uri for (uri,) in cur]
-
-
def serve_feed_debug(self, limit, offset, langs):
-
query = "select * from posts order by create_ts desc limit :limit offset :offset"
-
bindings = dict(limit=limit, offset=offset)
-
return apsw.ext.format_query_table(
-
self.db_cnx, query, bindings,
-
string_sanitize=2, text_width=9999, use_unicode=True
-
)
-65
feedweb.py
···
-
#!/usr/bin/env python3
-
-
from flask import Flask, request, jsonify
-
from prometheus_client import Counter, make_wsgi_app
-
from werkzeug.middleware.dispatcher import DispatcherMiddleware
-
from werkzeug.datastructures import LanguageAccept
-
-
from feed_manager import feed_manager
-
-
feed_requests = Counter('feed_requests', 'requests by feed URI', ['feed'])
-
-
app = Flask(__name__)
-
-
@app.route('/xrpc/app.bsky.feed.getFeedSkeleton')
-
def get_feed_skeleton():
-
try:
-
limit = int(request.args.get('limit', 50))
-
except ValueError:
-
limit = 50
-
-
if 'nz-interesting' in request.args['feed']:
-
offset = request.args.get('cursor')
-
else:
-
try:
-
offset = int(request.args.get('cursor', 0))
-
except ValueError:
-
offset = 0
-
-
feed_uri = request.args['feed']
-
if feed_uri.endswith('-dev'):
-
feed_uri = feed_uri.replace('-dev', '')
-
else:
-
(prefix, sep, rkey) = feed_uri.rpartition('/')
-
feed_requests.labels(rkey).inc()
-
-
if request.args.getlist('langs'):
-
req_langs = request.args.getlist('langs')
-
langs = LanguageAccept([(l, 1) for l in req_langs])
-
else:
-
langs = request.accept_languages
-
-
if request.args.get('debug', '0') == '1':
-
headers = {'Content-Type': 'text/plain; charset=utf-8'}
-
debug = feed_manager.serve_feed(feed_uri, limit, offset, langs, debug=True)
-
return debug, headers
-
-
posts = feed_manager.serve_feed(feed_uri, limit, offset, langs, debug=False)
-
if isinstance(posts, dict):
-
return posts
-
-
if len(posts) < limit:
-
return dict(feed=[dict(post=uri) for uri in posts])
-
else:
-
offset += len(posts)
-
return dict(cursor=str(offset), feed=[dict(post=uri) for uri in posts])
-
-
app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {
-
'/metrics': make_wsgi_app()
-
})
-
-
if __name__ == '__main__':
-
from feedweb_utils import did_doc
-
app.add_url_rule('/.well-known/did.json', view_func=did_doc)
-
-
app.run(debug=True)
-14
feedweb_utils.py
···
-
NGROK_HOSTNAME = 'routinely-right-barnacle.ngrok-free.app'
-
-
def did_doc():
-
return {
-
'@context': ['https://www.w3.org/ns/did/v1'],
-
'id': f'did:web:{NGROK_HOSTNAME}',
-
'service': [
-
{
-
'id': '#bsky_fg',
-
'type': 'BskyFeedGenerator',
-
'serviceEndpoint': f'https://{NGROK_HOSTNAME}',
-
},
-
],
-
}