+21
.github/workflows/build.yml
+21
.github/workflows/build.yml
···+run: cargo build --bin reflector --release && mv target/release/reflector target/release/reflector_amd64
+1
-1
.github/workflows/checks.yml
+1
-1
.github/workflows/checks.yml
···-run: cargo fmt --package links --package constellation --package ufos --package spacedust --package who-am-i --package slingshot -- --check+run: cargo fmt --package links --package constellation --package ufos --package spacedust --package who-am-i --package slingshot --package pocket -- --check
+737
-237
Cargo.lock
+737
-237
Cargo.lock
···············-source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"-"atrium-common 0.1.2 (git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits)",-"atrium-xrpc 0.12.3 (git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits)",···+source = "git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace#80a355991ac9b48ba3f559d12aac74f071fc638c"+"atrium-common 0.1.2 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-xrpc 0.12.3 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",···-source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"+source = "git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace#80a355991ac9b48ba3f559d12aac74f071fc638c"·········+source = "git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace#80a355991ac9b48ba3f559d12aac74f071fc638c"+"atrium-api 0.25.4 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-common 0.1.2 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-xrpc 0.12.3 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",···+source = "git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace#80a355991ac9b48ba3f559d12aac74f071fc638c"+"atrium-api 0.25.4 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-common 0.1.2 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-identity 0.1.5 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-xrpc 0.12.3 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",···-source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"+source = "git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace#80a355991ac9b48ba3f559d12aac74f071fc638c"·····························································································+source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d"······················································+"atrium-api 0.25.4 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",·········+source = "git+https://github.com/fatfingers23/jwt-compact.git#aed088b8ff5ad44ef2785c453f6a4b7916728b1c"····························································································································································+"atrium-api 0.25.4 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-common 0.1.2 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-identity 0.1.5 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",+"atrium-oauth 0.1.3 (git+https://github.com/uniphil/atrium.git?branch=fix%2Fresolve-handle-https-accept-whitespace)",·········································································································
+3
Cargo.toml
+3
Cargo.toml
+8
-1
Makefile
+8
-1
Makefile
···-cargo fmt --package links --package constellation --package ufos --package spacedust --package who-am-i --package slingshot
+1
-1
constellation/Cargo.toml
+1
-1
constellation/Cargo.toml
···
+661
constellation/LICENSE
+661
constellation/LICENSE
···
+3
constellation/LICENSE.future
+3
constellation/LICENSE.future
+9
constellation/readme.md
+9
constellation/readme.md
···+Constellation's source code is currently available exclusively under the AGPL license (see [LICENSE](./LICENSE)).+In the future, its code MAY become available under the MIT and/or Apache2.0 licenses, at the sole discretion of the microcosm organization. Contributing implies acceptance with this possible future licensing change. The change has not happed yet and is not guaranteed.
+57
-14
constellation/src/bin/main.rs
+57
-14
constellation/src/bin/main.rs
······/// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:·····················.set_bucket_count(NonZero::new(10).unwrap()) // count * duration = 5 mins. stuff doesn't happen that fast here.
+239
-238
constellation/src/bin/rocks-link-stats.rs
+239
-238
constellation/src/bin/rocks-link-stats.rs
···-struct SourceLink(Collection, RPath, LinkType, Option<Collection>); // last is target collection, if it's an at-uri link with a collection+// struct SourceLink(Collection, RPath, LinkType, Option<Collection>); // last is target collection, if it's an at-uri link with a collection-// b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b12, b16, b32, b64, b128, b256, b512, b1024, b4096, b16384, b65535, b262144, bmax+// xxx// b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b12, b16, b32, b64, b128, b256, b512, b1024, b4096, b16384, b65535, b262144, bmax-// 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 32, 64, 128, 256, 512, 1024, 4096, 16384, 65535, 262144, 1048576++// xxx// 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 32, 64, 128, 256, 512, 1024, 4096, 16384, 65535, 262144, 1048576+
+2
constellation/src/bin/rocks-restore-from-backup.rs
+2
constellation/src/bin/rocks-restore-from-backup.rs
······
+4
constellation/src/server/filters.rs
+4
constellation/src/server/filters.rs
+332
-19
constellation/src/server/mod.rs
+332
-19
constellation/src/server/mod.rs
······pub async fn serve<S, A>(store: S, addr: A, stay_alive: CancellationToken) -> anyhow::Result<()>························help: "open this URL in a web browser (or request with Accept: text/html) for information about this API.",···············
+93
-1
constellation/src/storage/mem_store.rs
+93
-1
constellation/src/storage/mem_store.rs
···············
+484
-14
constellation/src/storage/mod.rs
+484
-14
constellation/src/storage/mod.rs
······/// estimate of how many accounts we've seen create links. the _subjects_ of any links are not represented here.···/// for LSM stores, deleted links don't decrement this, and updated records with any links will likely increment it.···················································
+361
-41
constellation/src/storage/rocks_store.rs
+361
-41
constellation/src/storage/rocks_store.rs
···············pub db: Arc<DBWithThreadMode<MultiThreaded>>, // TODO: mov seqs here (concat merge op will be fun)·········-impl<Orig: Clone, IdVal: IdTableValue, const WITH_REVERSE: bool> IdTable<Orig, IdVal, WITH_REVERSE>·····················+let current_max = grouped_counts.keys().next_back().unwrap(); // limit should be non-zero bleh+eprintln!("failed to look up did_value from did_id {did_id:?}: {did:?}: data consistency bug?");···············
+54
constellation/templates/get-backlinks.html.j2
+54
constellation/templates/get-backlinks.html.j2
···+{% block description %}All {{ query.source }} records with links to {{ query.subject }}{% endblock %}+<small style="font-weight: normal; font-size: 1rem"><a href="{{ browseable_uri }}">browse record</a></small>+<li>See distinct linking DIDs at <code>/links/distinct-dids</code>: <a href="/links/distinct-dids?target={{ query.subject|urlencode }}&collection={{ collection|urlencode }}&path={{ path|urlencode }}">/links/distinct-dids?target={{ query.subject }}&collection={{ collection }}&path={{ path }}</a></li>+<li>See all links to this target at <code>/links/all</code>: <a href="/links/all?target={{ query.subject|urlencode }}">/links/all?target={{ query.subject }}</a></li>+<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>)+-> <a href="https://pdsls.dev/at://{{ record.did().0 }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre>
+67
constellation/templates/get-many-to-many-counts.html.j2
+67
constellation/templates/get-many-to-many-counts.html.j2
···+{% block description %}Counts of many-to-many {{ query.source }} join records with links to {{ query.subject }} and a secondary target at {{ query.path_to_other }}{% endblock %}+Many-to-many links to <code>{{ query.subject }}</code> joining through <code>{{ query.path_to_other }}</code>+<small style="font-weight: normal; font-size: 1rem"><a href="{{ browseable_uri }}">browse record</a></small>+<p><strong>{% if cursor.is_some() || query.cursor.is_some() %}more than {% endif %}{{ counts_by_other_subject.len()|to_u64|human_number }} joins</strong> <code>{{ query.source }}โ{{ query.path_to_other }}</code></p>+<li>See direct backlinks at <code>/xrpc/blue.microcosm.links.getBacklinks</code>: <a href="/xrpc/blue.microcosm.links.getBacklinks?subject={{ query.subject|urlencode }}&source={{ query.source|urlencode }}">/xrpc/blue.microcosm.links.getBacklinks?subject={{ query.subject }}&source={{ query.source }}</a></li>+<li>See all links to this target at <code>/links/all</code>: <a href="/links/all?target={{ query.subject|urlencode }}">/links/all?target={{ query.subject }}</a></li>+<pre style="display: block; margin: 1em 2em" class="code"><strong>Joined subject</strong>: {{ counts.subject }}
+65
-7
constellation/templates/hello.html.j2
+65
-7
constellation/templates/hello.html.j2
···<p>It works by recursively walking <em>all</em> records coming through the firehose, searching for anything that looks like a link. Links are indexed by the target they point at, the collection the record came from, and the JSON path to the link in that record.</p>-This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">{{ days_indexed|human_number }}</span> days.<br/>+This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">-<p>The API is currently <strong>unstable</strong>. But feel free to use it! If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p>+<p>You're welcome to use this public instance! Please do not build the torment nexus. If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p>+<li><p><code>subject</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>+<li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li>+<li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li>+<li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li>+{% call try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16) %}+<li><p><code>subject</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>+<li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li>+<li><p><code>pathToOther</code>: required. Path to the secondary link in the many-to-many record. Example: <code>otherThing.uri</code></p></li>+<li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li>+<li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>+<li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li>+<p>[DEPRECATED]: use <code>GET /xrpc/blue.microcosm.links.getBacklinks</code>. New apps should avoid it, but this endpoint <strong>will</strong> remain supported for the forseeable future.</p>-<li><code>target</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></li>+<li><p><code>target</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>+<li><p><code>path</code>: required, must url-encode. Example: <code>.subject.uri</code></p></li>+<li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li>+<li><p><code>from_dids</code> [deprecated]: optional. Use <code>did</code> instead. Example: <code>from_dids=did:plc:vc7f4oafdgxsihk4cry2xpze,did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li>+<li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li>-{% call try_it::links("at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r", "app.bsky.feed.like", ".subject.uri") %}+{% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16) %}
+2
-2
constellation/templates/links.html.j2
+2
-2
constellation/templates/links.html.j2
······<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>)--> <a href="https://atproto-browser-plus-links.vercel.app/at/{{ record.did().0|urlencode }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre>+-> <a href="https://pdsls.dev/at://{{ record.did().0 }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre>
+88
-3
constellation/templates/try-it-macros.html.j2
+88
-3
constellation/templates/try-it-macros.html.j2
···+?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." />+&source= <input type="text" name="source" value="{{ source }}" placeholder="app.bsky.feed.like:subject.uri" />+&did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %}+&limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre>+?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." />+&source= <input type="text" name="source" value="{{ source }}" placeholder="app.bsky.feed.like:subject.uri" />+&pathToOther= <input type="text" name="pathToOther" value="{{ pathToOther }}" placeholder="otherThing.uri" />+&did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %}+<span id="m2m-subject-placeholder"></span> <button id="m2m-add-subject">+ other subject filter</button>+&otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %}+&limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre>&collection= <input type="text" name="collection" value="{{ collection }}" placeholder="collection" />-&path= <input type="text" name="path" value="{{ path }}" placeholder="path" /> <button type="submit">get links</button></pre>+&did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %}+&limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre>
-496
cozy-setup (move to another repo).md
-496
cozy-setup (move to another repo).md
···-well... the gateway fell over IMMEDIATELY with like 2 req/sec from deletions, with that ^^ config. for now i removed everything except the reverse proxy config + normal caddy metrics and it's running fine on vanilla caddy. i did try reducing the rate-limiting configs to a single, fixed-key global limit but it still ate all the ram and died. maybe badger w/ the cache config was still a problem. maybe it would have been ok on a machine with more than 1GB mem.-- nginx. i should probably just use this. acme-client is a piece of cake to set up, and i know how to configure it.-- haproxy. also kind of familiar, it's old and stable. no idea how it handle low-mem (our 1gb) vs nginx.-- rpxy. like caddy (auto-tls) but in rust and actually fast? has an "experimental" cache feature. but the cache feature looks good.-- rama. build-your-own proxy. not sure that it has both cache and limiter in their standard features?-- pingora. build-your-own cloudflare, so like, probably stable. has tools for cache and limiting. low-mem...?-- cache stuff in pingora seems a little... hit and miss (byeeeee). only a test impl for Storage for the main cache feature?-- but the rate-limiter has a guide: https://github.com/cloudflare/pingora/blob/main/docs/user_guide/rate_limiter.md-what i want is low-resource reverse proxy with built-in rate-limiting and caching. but maybe cache (and/or ratelimiting) could be external to the reverse proxy-- varnish is a dedicated cache. has https://github.com/varnish/varnish-modules/blob/master/src/vmod_vsthrottle.vcc-curl -LO https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.109.1/victoria-metrics-linux-amd64-v1.109.1.tar.gz-sudo mkdir /etc/victoria-metrics && sudo chown -R victoriametrics:victoriametrics /etc/victoria-metrics-ExecStart=/usr/local/bin/victoria-metrics-prod -storageDataPath=/var/lib/victoria-metrics -retentionPeriod=90d -selfScrapeInterval=1m -promscrape.config=/etc/victoria-metrics/prometheus.yml-followed `https://grafana.com/docs/grafana/latest/setup-grafana/installation/debian/#install-grafana-on-debian-or-ubuntu`-sudo grafana-cli --pluginUrl https://github.com/VictoriaMetrics/victoriametrics-datasource/releases/download/v0.11.1/victoriametrics-datasource-v0.11.1.zip plugins install victoriametrics-curl -LO https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-armv7.tar.gz-proxy_cache_lock on; # make simlutaneous requests for the same uri wait for it to appear in cache instead of hitting origin-add_header Access-Control-Allow-Origin * always; # bit of hack to have it here but nginx doesn't like it in the `if`-https://github.com/nginx/nginx-prometheus-exporter/releases/download/v1.4.1/nginx-prometheus-exporter_1.4.1_linux_amd64.tar.gz-curl -LO https://github.com/nginx/nginx-prometheus-exporter/releases/download/v1.4.1/nginx-prometheus-exporter_1.4.1_linux_amd64.tar.gz-# ExecStart=/usr/local/bin/nginx-prometheus-exporter --nginx.scrape-uri=http://gateway:8080/stub_status --web.listen-address=gateway:9113-- mount sd card, touch `ssh` file echo `echo "pi:$(echo raspberry | openssl passwd -6 -stdin)" > userconf.txt`-- `RUST_BACKTRACE=full cargo run --bin rocks-restore-from-backup --release -- --from-backup-dir "/home/pi/backup/constellation-index" --to-data-dir /mnt/constellation-index`-- `RUST_BACKTRACE=full cargo run --release -- --backend rocks --data /mnt/constellation-index/ --jetstream us-east-2 --backup /home/pi/backup/constellation-index --backup-interval 6 --max-old-backups 20`-- stop backups from running on the older instance! `RUST_BACKTRACE=full cargo run --release -- --backend rocks --data /mnt/links-2.rocks/ --jetstream us-east-1`-ExecStart=/home/pi/links/target/release/main --backend rocks --data /mnt/constellation-index/ --jetstream us-east-2 --backup /home/pi/backup/constellation-index --backup-interval 6 --max-old-backups 20-- todo: overlayfs? would need to figure out builds/updates still, also i guess logs are currently written to sd? (oof)--> worked, but reverted for now: tailscale on raspi was consuming ~50% cpu for the jetstream traffic. this might be near its max since it would have been catching up at the time (max jetstream throughput) but it feels a bit too much. we have to trust the jetstream server and link_aggregator doesn't (yet) make any other external connections, so for now the raspi connects directly from my home again.-server <tailnet ip>:6789; # bootes; ip so that we don't race on reboot with tailscale coming up, which nginx doesn't like-proxy_cache_lock on; # make simlutaneous requests for the same uri wait for it to appear in cache instead of hitting origin-ssl_certificate /etc/letsencrypt/live/constellation.microcosm.blue/fullchain.pem; # managed by Certbot-ssl_certificate_key /etc/letsencrypt/live/constellation.microcosm.blue/privkey.pem; # managed by Certbot-re-reading about `nodelay`, i should probably remove it -- nginx would then queue requests to upstream, but still service them at the configured limit. it's fine for my internet since the global limit isn't nodelay, but probably less "fair" to clients if there's contention around the global limit (earlier requests would get all of theirs serviced before later ones can get in the queue)-curl -LO https://github.com/martin-helmich/prometheus-nginxlog-exporter/releases/download/v1.11.0/prometheus-nginxlog-exporter_1.11.0_linux_amd64.deb-log_format constellation_format "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\"";
+1
-1
jetstream/Cargo.toml
+1
-1
jetstream/Cargo.toml
···-atrium-api = { git = "https://github.com/uniphil/atrium", branch = "fix/nsid-allow-nonleading-name-digits", default-features = false, features = [+atrium-api = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace", default-features = false, features = [
+496
legacy/cozy-setup (move to another repo).md
+496
legacy/cozy-setup (move to another repo).md
···+well... the gateway fell over IMMEDIATELY with like 2 req/sec from deletions, with that ^^ config. for now i removed everything except the reverse proxy config + normal caddy metrics and it's running fine on vanilla caddy. i did try reducing the rate-limiting configs to a single, fixed-key global limit but it still ate all the ram and died. maybe badger w/ the cache config was still a problem. maybe it would have been ok on a machine with more than 1GB mem.+- nginx. i should probably just use this. acme-client is a piece of cake to set up, and i know how to configure it.+- haproxy. also kind of familiar, it's old and stable. no idea how it handle low-mem (our 1gb) vs nginx.+- rpxy. like caddy (auto-tls) but in rust and actually fast? has an "experimental" cache feature. but the cache feature looks good.+- rama. build-your-own proxy. not sure that it has both cache and limiter in their standard features?+- pingora. build-your-own cloudflare, so like, probably stable. has tools for cache and limiting. low-mem...?+- cache stuff in pingora seems a little... hit and miss (byeeeee). only a test impl for Storage for the main cache feature?+- but the rate-limiter has a guide: https://github.com/cloudflare/pingora/blob/main/docs/user_guide/rate_limiter.md+what i want is low-resource reverse proxy with built-in rate-limiting and caching. but maybe cache (and/or ratelimiting) could be external to the reverse proxy+- varnish is a dedicated cache. has https://github.com/varnish/varnish-modules/blob/master/src/vmod_vsthrottle.vcc+curl -LO https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.109.1/victoria-metrics-linux-amd64-v1.109.1.tar.gz+sudo mkdir /etc/victoria-metrics && sudo chown -R victoriametrics:victoriametrics /etc/victoria-metrics+ExecStart=/usr/local/bin/victoria-metrics-prod -storageDataPath=/var/lib/victoria-metrics -retentionPeriod=90d -selfScrapeInterval=1m -promscrape.config=/etc/victoria-metrics/prometheus.yml+followed `https://grafana.com/docs/grafana/latest/setup-grafana/installation/debian/#install-grafana-on-debian-or-ubuntu`+sudo grafana-cli --pluginUrl https://github.com/VictoriaMetrics/victoriametrics-datasource/releases/download/v0.11.1/victoriametrics-datasource-v0.11.1.zip plugins install victoriametrics+curl -LO https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-armv7.tar.gz+proxy_cache_lock on; # make simlutaneous requests for the same uri wait for it to appear in cache instead of hitting origin+add_header Access-Control-Allow-Origin * always; # bit of hack to have it here but nginx doesn't like it in the `if`+https://github.com/nginx/nginx-prometheus-exporter/releases/download/v1.4.1/nginx-prometheus-exporter_1.4.1_linux_amd64.tar.gz+curl -LO https://github.com/nginx/nginx-prometheus-exporter/releases/download/v1.4.1/nginx-prometheus-exporter_1.4.1_linux_amd64.tar.gz+# ExecStart=/usr/local/bin/nginx-prometheus-exporter --nginx.scrape-uri=http://gateway:8080/stub_status --web.listen-address=gateway:9113+- mount sd card, touch `ssh` file echo `echo "pi:$(echo raspberry | openssl passwd -6 -stdin)" > userconf.txt`+- `RUST_BACKTRACE=full cargo run --bin rocks-restore-from-backup --release -- --from-backup-dir "/home/pi/backup/constellation-index" --to-data-dir /mnt/constellation-index`+- `RUST_BACKTRACE=full cargo run --release -- --backend rocks --data /mnt/constellation-index/ --jetstream us-east-2 --backup /home/pi/backup/constellation-index --backup-interval 6 --max-old-backups 20`+- stop backups from running on the older instance! `RUST_BACKTRACE=full cargo run --release -- --backend rocks --data /mnt/links-2.rocks/ --jetstream us-east-1`+ExecStart=/home/pi/links/target/release/main --backend rocks --data /mnt/constellation-index/ --jetstream us-east-2 --backup /home/pi/backup/constellation-index --backup-interval 6 --max-old-backups 20+- todo: overlayfs? would need to figure out builds/updates still, also i guess logs are currently written to sd? (oof)+-> worked, but reverted for now: tailscale on raspi was consuming ~50% cpu for the jetstream traffic. this might be near its max since it would have been catching up at the time (max jetstream throughput) but it feels a bit too much. we have to trust the jetstream server and link_aggregator doesn't (yet) make any other external connections, so for now the raspi connects directly from my home again.+server <tailnet ip>:6789; # bootes; ip so that we don't race on reboot with tailscale coming up, which nginx doesn't like+proxy_cache_lock on; # make simlutaneous requests for the same uri wait for it to appear in cache instead of hitting origin+ssl_certificate /etc/letsencrypt/live/constellation.microcosm.blue/fullchain.pem; # managed by Certbot+ssl_certificate_key /etc/letsencrypt/live/constellation.microcosm.blue/privkey.pem; # managed by Certbot+re-reading about `nodelay`, i should probably remove it -- nginx would then queue requests to upstream, but still service them at the configured limit. it's fine for my internet since the global limit isn't nodelay, but probably less "fair" to clients if there's contention around the global limit (earlier requests would get all of theirs serviced before later ones can get in the queue)+curl -LO https://github.com/martin-helmich/prometheus-nginxlog-exporter/releases/download/v1.11.0/prometheus-nginxlog-exporter_1.11.0_linux_amd64.deb+log_format constellation_format "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\"";
+35
legacy/old-readme-details.md
+35
legacy/old-readme-details.md
···+- Self hostable: handles the full write throughput of the global atproto firehose on a raspberry pi 4b + single SSD+- Storage efficient: less than 2GB/day disk consumption indexing all references in all lexicons and all non-atproto URLs+- Handles record deletion, account de/re-activation, and account deletion, ensuring accurate link counts and respecting users data choices+All social interactions in atproto tend to be represented by links (or references) between PDS records. This index can answer questions like "how many likes does a bsky post have", "who follows an account", "what are all the comments on a [frontpage](https://frontpage.fyi/) post", and more.+- **status**: works! api is unstable and likely to change, and no known instances have a full network backfill yet.+_note: the public instance currently runs on a little raspberry pi in my house, feel free to use it! it comes with only with best-effort uptime, no commitment to not breaking the api for now, and possible rate-limiting. if you want to be nice you can put your project name and bsky username (or email) in your user-agent header for api requests._+using the same "link source" concept as [constellation](./constellation/), offer webhook notifications for new references created to records+A rust crate (not published on crates.io yet) for optimistically parsing links out of arbitrary atproto PDS records, and potentially canonicalizing them
+123
legacy/original-notes.md
+123
legacy/original-notes.md
···+as far as i can tell, atproto lexicons today don't follow much of a convention for referencing across documents: sometimes it's a StrongRef, sometimes it's a DID, sometimes it's a bare at-uri. lexicon authors choose any old link-sounding key name for the key in their document.+it's pretty messy so embrace the mess: atproto wants to be part of the web, so this library will also extract URLs and other URIs if you want it to. all the links.+the atproto firehose that bluesky sprays at you will contain raw _contents_ from peoples' pdses. these are isolated, decontextualized updates. it's very easy to build some kinds of interesting downstream apps off of this feed.+but bringing almost kind of _context_ into your project requires a big step up in complexity and potentially cost: you're entering "appview" territory. _how many likes does a post have? who follows this account?_+you own your atproto data: it's kept in your personal data repository (PDS) and noone else can write to it. when someone likes your post, they create a "like" record in their _own_ pds, and that like belongs to _them_, not to you/your post.+in the firehose you'll see a `app.bsky.feed.post` record created, with no details about who has liked it. then you'll see separate `app.bsky.feed.like` records show up for each like that comes in on that post, with no context about the post except a random-looking reference to it. storing these in order to do so is up to you!+everything is links, and they're a mess, but they all kinda work the same, so maybe some tooling can bring down that big step in complexity from firehose raw-content apps -> apps requiring any social context.+i think that making these low-level services as easy to use as jetstream could open up pathways for building more atproto apps that operate at full scale with interesting features for reasonable effort at low cost to operate.+- med-level: pass a &str of record in json form and get a list of parsed links + json paths back. (todo: should also handle dag-cbor prob?)+- high-ish level: pass the json record and maybe apply some pre-loaded rules based on known lexicons to get the best result.+for now, a link is only considered if it matches for the entire value of the record's field -- links embedded in text content are not included. note that urls in bluesky posts _will_ still be extracted, since they are broken out into facets.+every at-uri has at least two equivalent forms, one with a `DID`, and one with an account handle. the at-uri spec [illustrates this by example](https://atproto.com/specs/at-uri-scheme):+some applications, like a reverse link index, may wish to canonicalize at-uris to a single form. the `DID`-form is stable as an account changes its handle and probably the right choice to canonicalize to, but maybe some apps would actually perfer to canonicalise to handles?+links might be relative, in which case they might need to be made absolute before being useful. is that a concern for this library, or up to the user? (seems like we might not have context here to determine its absolute)+- using `tinyjson` because it's nice -- maybe should switch to serde_json to share deps with atrium?+- would use atrium for parsing at-uris, but it's not in there. there's a did-only version in the non-lib commands.rs. its identifier parser is strict to did + handle, which makes sense, but for our purposes we might want to allow unknown methods too?
+196
legacy/ufos ops (move to micro-ops).md
+196
legacy/ufos ops (move to micro-ops).md
···+0 1/6 * * * rsync -ahP --delete "/mnt/ufos-db/.snapshots/$(sudo snapper --csvout -c ufos-db list --columns number | tail -n1)/snapshot/" backup/ufos/+format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $upstream_cache_status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""+format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $upstream_cache_status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""+format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $upstream_cache_status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
+1
pocket/.gitignore
+1
pocket/.gitignore
···
+19
pocket/Cargo.toml
+19
pocket/Cargo.toml
···+jwt-compact = { git = "https://github.com/fatfingers23/jwt-compact.git", features = ["es256k"] }
+17
pocket/api-description.md
+17
pocket/api-description.md
···+This API leverages atproto service proxying to offer a bit of per-user per-app non-public data storage.+Perfect for things like application preferences that might be better left out of the public PDS data.+The intent is to use oauth scopes to isolate storage on a per-application basis, and to allow easy data migration from a community hosted instance to your own if you end up needing that.+> Pocket is currently in a **v0, pre-release state**. There is one production instance and you can use it! Expect short downtimes for restarts as development progresses and occaisional data loss until it's stable.+ATProto might end up adding a similar feature to [PDSs](https://atproto.com/guides/glossary#pds-personal-data-server). If/when that happens, you should use it instead of this!
+7
pocket/src/lib.rs
+7
pocket/src/lib.rs
+34
pocket/src/main.rs
+34
pocket/src/main.rs
···
+265
pocket/src/server.rs
+265
pocket/src/server.rs
···
+50
pocket/src/storage.rs
+50
pocket/src/storage.rs
···
+143
pocket/src/token.rs
+143
pocket/src/token.rs
···+// danger! unfortunately we need to decode the DID from the jwt body before we have a public key to verify the jwt with
+67
pocket/static/index.html
+67
pocket/static/index.html
···+<meta name="description" content="API Documentation for Pocket, a simple user-preference storage system for atproto" />
+8
quasar/Cargo.toml
+8
quasar/Cargo.toml
+3
quasar/readme.md
+3
quasar/readme.md
+57
-129
readme.md
+57
-129
readme.md
···+[](https://bsky.app/profile/microcosm.blue)+[](https://discord.gg/tcDfe4PGVB)+[](https://github.com/sponsors/uniphil/)+[](https://ko-fi.com/bad_example)+Welcome! Documentation is under active development. If you like reading API docs, you'll probably hit the ground running!+Tutorials, how-to guides, and client SDK libraries are all in the works for gentler on-ramps, but are not quite ready yet. But don't let that stop you! Hop in the [microcosm discord](https://discord.gg/tcDfe4PGVB), or post questions and tag [@bad-example.com](https://bsky.app/profile/bad-example.com) on Bluesky if you get stuck anywhere.-- Self hostable: handles the full write throughput of the global atproto firehose on a raspberry pi 4b + single SSD-- Storage efficient: less than 2GB/day disk consumption indexing all references in all lexicons and all non-atproto URLs-- Handles record deletion, account de/re-activation, and account deletion, ensuring accurate link counts and respecting users data choices+> This repository's primary home is moving to tangled: [@microcosm.blue/microcosm-rs](https://tangled.sh/@microcosm.blue/microcosm-rs). It will continue to be mirrored on [github](https://github.com/at-microcosm/microcosm-rs) for the forseeable future, and it's fine to open issues or pulls in either place!-All social interactions in atproto tend to be represented by links (or references) between PDS records. This index can answer questions like "how many likes does a bsky post have", "who follows an account", "what are all the comments on a [frontpage](https://frontpage.fyi/) post", and more.-- **status**: works! api is unstable and likely to change, and no known instances have a full network backfill yet.-_note: the public instance currently runs on a little raspberry pi in my house, feel free to use it! it comes with only with best-effort uptime, no commitment to not breaking the api for now, and possible rate-limiting. if you want to be nice you can put your project name and bsky username (or email) in your user-agent header for api requests._-using the same "link source" concept as [constellation](./constellation/), offer webhook notifications for new references created to records-A rust crate (not published on crates.io yet) for optimistically parsing links out of arbitrary atproto PDS records, and potentially canonicalizing them-as far as i can tell, atproto lexicons today don't follow much of a convention for referencing across documents: sometimes it's a StrongRef, sometimes it's a DID, sometimes it's a bare at-uri. lexicon authors choose any old link-sounding key name for the key in their document.-it's pretty messy so embrace the mess: atproto wants to be part of the web, so this library will also extract URLs and other URIs if you want it to. all the links.-the atproto firehose that bluesky sprays at you will contain raw _contents_ from peoples' pdses. these are isolated, decontextualized updates. it's very easy to build some kinds of interesting downstream apps off of this feed.-but bringing almost kind of _context_ into your project requires a big step up in complexity and potentially cost: you're entering "appview" territory. _how many likes does a post have? who follows this account?_-you own your atproto data: it's kept in your personal data repository (PDS) and noone else can write to it. when someone likes your post, they create a "like" record in their _own_ pds, and that like belongs to _them_, not to you/your post.-in the firehose you'll see a `app.bsky.feed.post` record created, with no details about who has liked it. then you'll see separate `app.bsky.feed.like` records show up for each like that comes in on that post, with no context about the post except a random-looking reference to it. storing these in order to do so is up to you!-everything is links, and they're a mess, but they all kinda work the same, so maybe some tooling can bring down that big step in complexity from firehose raw-content apps -> apps requiring any social context.-i think that making these low-level services as easy to use as jetstream could open up pathways for building more atproto apps that operate at full scale with interesting features for reasonable effort at low cost to operate.+A global atproto interactions backlink index as a simple JSON API. Works with every lexicon, runs on a raspberry pi, consumes less than 2GiB of disk per day. Handles record deletion, account de/re-activation, and account deletion, ensuring accurate link counts while respecting users' data choices.+- Status: used in production. APIs will change but backwards compatibility will be maintained as long as needed.+A global atproto interactions firehose. Extracts all at-uris, DIDs, and URLs from every lexicon in the firehose, and exposes them over a websocket modelled after [jetstream](github.com/bluesky-social/jetstream).-- med-level: pass a &str of record in json form and get a list of parsed links + json paths back. (todo: should also handle dag-cbor prob?)+- Status: v0: the basics work and the APIs are in place! missing cursor replay, forward link storage, and delete event link hydration.-- high-ish level: pass the json record and maybe apply some pre-loaded rules based on known lexicons to get the best result.-for now, a link is only considered if it matches for the entire value of the record's field -- links embedded in text content are not included. note that urls in bluesky posts _will_ still be extracted, since they are broken out into facets.+- [Spacedust notifications](https://notifications.microcosm.blue/): web push notifications for _every_ atproto app+- [Zero-Bluesky real-time interaction-updating post embed](https://bsky.bad-example.com/zero-bluesky-realtime-embed/)+A fast, eager, production-grade edge cache for atproto records and identities. Pre-caches all records from the firehose and maintains a longer-term cache of requested records on disk.-every at-uri has at least two equivalent forms, one with a `DID`, and one with an account handle. the at-uri spec [illustrates this by example](https://atproto.com/specs/at-uri-scheme):-some applications, like a reverse link index, may wish to canonicalize at-uris to a single form. the `DID`-form is stable as an account changes its handle and probably the right choice to canonicalize to, but maybe some apps would actually perfer to canonicalise to handles?+Timeseries stats and sample records for every [collection](https://atproto.com/guides/glossary#collection) ever seen in the atproto firehose. Unique users are counted in hyperloglog sketches enabling arbitrary cardinality aggregation across time buckets and/or NSIDs.+- Status: Used in production. It has APIs and they work! Needs improvement on indexing; needs more indexes and some more APIs to the data exposed.+> See also: [UFOs atproto explorer](https://ufos.microcosm.blue/) built on UFOs API. ([source](github.com/at-microcosm/spacedust-utils))-links might be relative, in which case they might need to be made absolute before being useful. is that a concern for this library, or up to the user? (seems like we might not have context here to determine its absolute)+- Status: used in multiple apps in production, but not yet published to crates.io; some rework planned-- using `tinyjson` because it's nice -- maybe should switch to serde_json to share deps with atrium?+> See also: [Rocketman](https://github.com/teal-fm/cadet/tree/main/rocketman), another excellent rust jetstream client which shares some lineage and _is_ published on crates.io.-- would use atrium for parsing at-uris, but it's not in there. there's a did-only version in the non-lib commands.rs. its identifier parser is strict to did + handle, which makes sense, but for our purposes we might want to allow unknown methods too?+An identity bridge for microcosm demos, that kinda worked. Fixing its problems is about equivalent to reinventing a lot of OIDC, so it's being retired.+> `who-am-i` is still in use for the Spacedust Notifications demo, but that will hopefully be migrated to use atproto oauth directly instead.
+12
reflector/Cargo.toml
+12
reflector/Cargo.toml
···
+9
reflector/readme.md
+9
reflector/readme.md
···+receiving requests from multiple subdomains is left as a problem for the reverse proxy to solve, since acme wildcard certificates (ie. letsencrypt) require the most complicated and involved challenge type (DNS).+caddy [has good support for](https://caddyserver.com/docs/caddyfile/patterns#wildcard-certificates) configuring the wildcard DNS challenge with various DNS providers, and also supports [on-demand](https://caddyserver.com/docs/automatic-https#using-on-demand-tls) provisioning via the simpler methods.+if you only need a small fixed number of subdomains, you can also use certbot or otherwise individually configure them in your reverse proxy.
+112
reflector/src/main.rs
+112
reflector/src/main.rs
···
+12
-2
slingshot/Cargo.toml
+12
-2
slingshot/Cargo.toml
···+atrium-api = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace", default-features = false }+atrium-common = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace" }+atrium-identity = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace" }+atrium-oauth = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace" }
+93
slingshot/api-description.md
+93
slingshot/api-description.md
···+_A [gravitational slingshot](https://en.wikipedia.org/wiki/Gravity_assist) makes use of the gravity and relative movements of celestial bodies to accelerate a spacecraft and change its trajectory._+Applications in [ATProtocol](https://atproto.com/) store data in users' own [PDS](https://atproto.com/guides/self-hosting) (Personal Data Server), which are distributed across thousands of independently-run servers all over the world. Trying to access this data poses challenges for client applications:+Large projects like [Bluesky](https://bsky.app/) control their performance and reliability by syncing all app-relevant data from PDSs into first-party databases. But for new apps, building out this additional data infrastructure adds significant effort and complexity up front.+**Slingshot is a fast, eager, production-grade cache of data in the [ATmosphere](https://atproto.com/)**, offering performance and reliability without custom infrastructure.+> Slingshot is currently in a **v0, pre-release state**. There is one production instance and you can use it! Expect short downtimes for restarts as development progresses and lower cache hit-rates as the internal storage caches are adjusted and reset.+The core APIs will not change, since they are standard third-party `com.atproto` query APIs from ATProtocol.+Slingshot subscribes to the global [Firehose](https://atproto.com/specs/sync#firehose) of data updates. It keeps a short-term rolling indexed window of *all* data, and automatically promotes content likely to be requested to its longer-term main cache. _(automatic promotion is still a work in progress)_+When there is a cache miss, Slingshot can often still accelerate record fetching, since it keeps a large cache of resolved identities: it can usually request from the correct PDS without extra lookups.+The fireshose includes **update** and **delete** events, which Slingshot uses to ensure stale and deleted data is removed within a very short window. Additonally, identity and account-level events can trigger rapid cleanup of data for deactivated and deleted accounts. _(some of this is still a work in progress)_+The "AT" in ATProtocol [stands for _Authenticated Transfer_](https://atproto.com/guides/glossary#at-protocol): all data is cryptographically signed, which makes it possible to broadcast data through third parties and trust that it's real _without_ having to directly contact the originating server.+Two core standard query APIs are supported to balance convenience and trust. They both fetch [records](https://atproto.com/guides/glossary#record):+### [`com.atproto.repo.getRecord`](#tag/comatproto-queries/get/xrpc/com.atproto.repo.getRecord)+### [`com.atproto.sync.getRecord`](#tag/comatproto-queries/get/xrpc/com.atproto.sync.getRecord)+- [`DAG-CBOR`](https://atproto.com/specs/data-model)-encoded response requires extra libraries to decode, but+Clients can proxy atproto queries through their own PDS with [Service Proxying](https://atproto.com/specs/xrpc#service-proxying), and this is supported by Slingshot. The Slingshot instance must be started the `--domain` argument specified.+Where `<your pds>` is the user's own PDS host, and `<slingshot domain>` is the domain that the slingshot instance is deployed at (eg. `slingshot.microcosm.blue`). See the [Service Proxying](https://atproto.com/specs/xrpc#service-proxying) docs for more.+> Service proxying is supported but completely optional. All APIs are directly accessible over the public internet, and GeoDNS helps route users to the closest instance to them for the lowest possible latency. (_note: deploying multiple slingshot instances with GeoDNS is still TODO_)+- Slingshot also offers variants of the `getRecord` endpoints that accept a full `at-uri` as a parameter, to save clients from needing to parse and validate all parts of a record location.+- Bi-directionally verifying identity endpoints, so you can directly exchange atproto [`handle`](https://atproto.com/guides/glossary#handle)s for [`DID`](https://atproto.com/guides/glossary#did-decentralized-id)s without extra steps, plus a convenient [Mini-Doc](#tag/slingshot-specific-queries/get/xrpc/com.bad-example.identity.resolveMiniDoc) verified identity summary.+[Microcosm](https://www.microcosm.blue/) is a collection of services and independent community-run infrastructure for ATProtocol.+Slingshot excels when combined with _shallow indexing_ services, which offer fast queries of global data relationships but with only references to the data records. Microcosm has a few!+- [๐ Constellation](https://constellation.microcosm.blue/), a global backlink index (all social interactions in atproto are links!)+> All microcosm projects are [open source](https://tangled.sh/@bad-example.com/microcosm-links). **You can help sustain Slingshot** and all of microcosm by becoming a [Github sponsor](https://github.com/sponsors/uniphil/) or a [Ko-fi supporter](https://ko-fi.com/bad_example)!
+7
slingshot/readme.md
+7
slingshot/readme.md
+67
-1
slingshot/src/error.rs
+67
-1
slingshot/src/error.rs
·········
+6
-2
slingshot/src/firehose_cache.rs
+6
-2
slingshot/src/firehose_cache.rs
···+.with_file_size(16 * 2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records
+32
slingshot/src/healthcheck.rs
+32
slingshot/src/healthcheck.rs
···
+525
slingshot/src/identity.rs
+525
slingshot/src/identity.rs
···+/// 3. DID -> handle resolution: for bidirectional handle validation and in case we want to offer this+use atrium_oauth::DefaultHttpClient; // it's probably not worth bringing all of atrium_oauth for this but+/// just a lock to ensure only one refresher (queue consumer) is running (to be improved with a better refresher)+.with_file_size(2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records
+5
-1
slingshot/src/lib.rs
+5
-1
slingshot/src/lib.rs
···
+89
-6
slingshot/src/main.rs
+89
-6
slingshot/src/main.rs
············
+120
-1
slingshot/src/record.rs
+120
-1
slingshot/src/record.rs
······+// TODO: throttle outgoing requests by host probably, generally guard against outgoing requests+.map_err(RecordError::StatusError)? // TODO atproto error handling (think about handling not found)
+637
-46
slingshot/src/server.rs
+637
-46
slingshot/src/server.rs
·····················+/// more convenient [request parameters](#tag/slingshot-specific-queries/GET/xrpc/com.bad-example.repo.getUriRecord)+/// or [response formats](#tag/slingshot-specific-queries/GET/xrpc/com.bad-example.identity.resolveMiniDoc).···+/// > See also the [canonical `com.atproto` XRPC documentation](https://docs.bsky.app/docs/api/com-atproto-repo-get-record)+/// Ergonomic complement to [`com.atproto.repo.getRecord`](https://docs.bsky.app/docs/api/com-atproto-repo-get-record)+/// > See the [canonical `com.atproto` XRPC documentation](https://docs.bsky.app/docs/api/com-atproto-identity-resolve-handle)+/// Like [com.atproto.identity.resolveIdentity](https://docs.bsky.app/docs/api/com-atproto-identity-resolve-identity)······+// but these are both not specified to do bidirectional validation, which is what we want to offer+// we could do horrible things and implement resolveIdentity with only a stripped-down fake did doc+// but this will *definitely* cause problems because eg. we're not currently storing pubkeys and+/// - slingshot *may* implement more generous per-user rate-limits for proxied requests in the future+async fn run<L>(listener: L, app: Route, shutdown: CancellationToken) -> Result<(), ServerError>
+67
slingshot/static/index.html
+67
slingshot/static/index.html
···+<meta name="description" content="API Documentation for Slingshot, a firehose-listening atproto edge record and identity cache." />
+5
-5
spacedust/src/subscriber.rs
+5
-5
spacedust/src/subscriber.rs
···
+1
-1
ufos/Cargo.toml
+1
-1
ufos/Cargo.toml
···
+42
-10
ufos/src/main.rs
+42
-10
ufos/src/main.rs
············
+11
-1
ufos/src/storage.rs
+11
-1
ufos/src/storage.rs
·········
+104
-20
ufos/src/storage_fjall.rs
+104
-20
ufos/src/storage_fjall.rs
························-batch.remove(&self.rollups, &old_k); // TODO: when fjall gets weak delete, this will hopefully work way better+// remove_weak is allowed here because the secondary ranking index only ever inserts once at a key···-batch.remove(&self.rollups, &old_k); // TODO: when fjall gets weak delete, this will hopefully work way better+// remove_weak is allowed here because the secondary ranking index only ever inserts once at a key··················-counter!("storage_trim_removed", "dangling" => "false").increment((total_deleted - total_danglers) as u64);+counter!("storage_trim_removed", "dangling" => "false").increment((total_deleted - total_danglers) as u64);+log::warn!("weird trim case: more danglers than deleted? metric will be missing for dangling=false. deleted={total_deleted} danglers={total_danglers}");
-196
ufos ops (move to micro-ops).md
-196
ufos ops (move to micro-ops).md
···-0 1/6 * * * rsync -ahP --delete "/mnt/ufos-db/.snapshots/$(sudo snapper --csvout -c ufos-db list --columns number | tail -n1)/snapshot/" backup/ufos/-format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $upstream_cache_status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""-format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $upstream_cache_status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""-format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $upstream_cache_status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
+4
-4
who-am-i/src/server.rs
+4
-4
who-am-i/src/server.rs
···