tracks lexicons and how many times they appeared on the jetstream

Compare changes

Choose any two refs to compare.

+3 -12
README.md
···
-
a webapp and server that monitors the jetstream and tracks the different
-
lexicons as they are created or deleted. it shows you which collections are most
-
active on the network.
+
a webapp and server that monitors the jetstream and tracks the different lexicons as they are created or deleted.
+
it shows you which collections are most active on the network.
for backend it uses rust with fjall as db, the frontend is built with sveltekit.
see [here](https://gaze.systems/nsid-tracker) for a hosted instance of it.
-
## performance / storage
-
-
it uses about 50MB of space for 620M recorded events (events being just
-
timestamp in seconds and deleted boolean for now). and around 50-60ms for
-
querying 300-400k events.
-
-
this is on a machine with AMD EPYC 7281 (32) @ 2.100GHz.
-
## running
### with nix
-
- build the server: `nix build git+https://tangled.sh/@poor.dog/nsid-tracker#server`
+
- run the server: `nix run git+https://tangled.sh/@poor.dog/nsid-tracker#server`
- build the client: `nix build git+https://tangled.sh/@poor.dog/nsid-tracker#client`
### manually
-9
client/bun.lock
···
"name": "nsid-tracker",
"dependencies": {
"@number-flow/svelte": "^0.3.9",
-
"svelte-adapter-bun": "^0.5.2",
},
"devDependencies": {
"@eslint/compat": "^1.2.5",
···
"globals": ["globals@16.3.0", "", {}, "sha512-bqWEnJ1Nt3neqx2q5SFfGS8r/ahumIakg3HcwtNlrVlwXIeNumWn/c7Pn/wKzGhf6SaW6H6uWXLqC30STCMchQ=="],
-
"globalyzer": ["globalyzer@0.1.0", "", {}, "sha512-40oNTM9UfG6aBmuKxk/giHn5nQ8RVz/SS4Ir6zgzOv9/qC3kKZ9v4etGTcJbEl/NyVQH7FGU7d+X1egr57Md2Q=="],
-
-
"globrex": ["globrex@0.1.2", "", {}, "sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg=="],
-
"graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="],
"graphemer": ["graphemer@1.4.0", "", {}, "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag=="],
···
"svelte": ["svelte@5.36.8", "", { "dependencies": { "@ampproject/remapping": "^2.3.0", "@jridgewell/sourcemap-codec": "^1.5.0", "@sveltejs/acorn-typescript": "^1.0.5", "@types/estree": "^1.0.5", "acorn": "^8.12.1", "aria-query": "^5.3.1", "axobject-query": "^4.1.0", "clsx": "^2.1.1", "esm-env": "^1.2.1", "esrap": "^2.1.0", "is-reference": "^3.0.3", "locate-character": "^3.0.0", "magic-string": "^0.30.11", "zimmerframe": "^1.1.2" } }, "sha512-8JbZWQu96hMjH/oYQPxXW6taeC6Awl6muGHeZzJTxQx7NGRQ/J9wN1hkzRKLOlSDlbS2igiFg7p5xyTp5uXG3A=="],
-
"svelte-adapter-bun": ["svelte-adapter-bun@0.5.2", "", { "dependencies": { "tiny-glob": "^0.2.9" } }, "sha512-xEtFgaal6UgrCwwkSIcapO9kopoFNUYCYqyKCikdqxX9bz2TDYnrWQZ7qBnkunMxi1HOIERUCvTcebYGiarZLA=="],
-
"svelte-check": ["svelte-check@4.3.0", "", { "dependencies": { "@jridgewell/trace-mapping": "^0.3.25", "chokidar": "^4.0.1", "fdir": "^6.2.0", "picocolors": "^1.0.0", "sade": "^1.7.4" }, "peerDependencies": { "svelte": "^4.0.0 || ^5.0.0-next.0", "typescript": ">=5.0.0" }, "bin": { "svelte-check": "bin/svelte-check" } }, "sha512-Iz8dFXzBNAM7XlEIsUjUGQhbEE+Pvv9odb9+0+ITTgFWZBGeJRRYqHUUglwe2EkLD5LIsQaAc4IUJyvtKuOO5w=="],
"svelte-eslint-parser": ["svelte-eslint-parser@1.3.0", "", { "dependencies": { "eslint-scope": "^8.2.0", "eslint-visitor-keys": "^4.0.0", "espree": "^10.0.0", "postcss": "^8.4.49", "postcss-scss": "^4.0.9", "postcss-selector-parser": "^7.0.0" }, "peerDependencies": { "svelte": "^3.37.0 || ^4.0.0 || ^5.0.0" }, "optionalPeers": ["svelte"] }, "sha512-VCgMHKV7UtOGcGLGNFSbmdm6kEKjtzo5nnpGU/mnx4OsFY6bZ7QwRF5DUx+Hokw5Lvdyo8dpk8B1m8mliomrNg=="],
···
"tapable": ["tapable@2.2.2", "", {}, "sha512-Re10+NauLTMCudc7T5WLFLAwDhQ0JWdrMK+9B2M8zR5hRExKmsRDCBA7/aV/pNJFltmBFO5BAMlQFi/vq3nKOg=="],
"tar": ["tar@7.4.3", "", { "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", "minipass": "^7.1.2", "minizlib": "^3.0.1", "mkdirp": "^3.0.1", "yallist": "^5.0.0" } }, "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw=="],
-
-
"tiny-glob": ["tiny-glob@0.2.9", "", { "dependencies": { "globalyzer": "0.1.0", "globrex": "^0.1.2" } }, "sha512-g/55ssRPUjShh+xkfx9UPDXqhckHEsHr4Vd9zX55oSdGZc/MD0m3sferOkwWtp98bv+kcVfEHtRJgBVJzelrzg=="],
"tinyglobby": ["tinyglobby@0.2.14", "", { "dependencies": { "fdir": "^6.4.4", "picomatch": "^4.0.2" } }, "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ=="],
+1 -2
client/package.json
···
},
"type": "module",
"dependencies": {
-
"@number-flow/svelte": "^0.3.9",
-
"svelte-adapter-bun": "^0.5.2"
+
"@number-flow/svelte": "^0.3.9"
}
}
-4
client/src/app.css
···
overflow-y: overlay;
overflow-y: auto; /* Fallback for browsers that don't support overlay */
}
-
-
.wsbadge {
-
@apply text-sm font-semibold mt-1.5 px-2.5 py-0.5 rounded-full border;
-
}
+9 -9
client/src/app.html
···
<!doctype html>
<html lang="en">
-
<head>
-
<meta charset="utf-8" />
-
<link rel="icon" href="%sveltekit.assets%/favicon.svg" />
-
<meta name="viewport" content="width=device-width, initial-scale=1" />
-
%sveltekit.head%
-
</head>
-
<body class="bg-white dark:bg-gray-900" data-sveltekit-preload-data="hover">
-
<div style="display: contents">%sveltekit.body%</div>
-
</body>
+
<head>
+
<meta charset="utf-8" />
+
<link rel="icon" href="%sveltekit.assets%/favicon.svg" />
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
+
%sveltekit.head%
+
</head>
+
<body data-sveltekit-preload-data="hover">
+
<div style="display: contents">%sveltekit.body%</div>
+
</body>
</html>
+9 -2
client/src/lib/components/BskyToggle.svelte
···
<!-- svelte-ignore a11y_no_static_element_interactions -->
<button
onclick={onBskyToggle}
-
class="wsbadge !mt-0 !font-normal bg-blue-100 dark:bg-blue-900 hover:bg-blue-200 dark:hover:bg-blue-800 border-blue-300 dark:border-blue-700"
+
class="wsbadge !mt-0 !font-normal bg-yellow-100 hover:bg-yellow-200 border-yellow-300"
>
<input checked={dontShowBsky} type="checkbox" />
-
<span class="ml-0.5 text-black dark:text-gray-200"> hide app.bsky.* </span>
+
<span class="ml-0.5"> hide app.bsky.* </span>
</button>
+
+
<style lang="postcss">
+
@reference "../../app.css";
+
.wsbadge {
+
@apply text-sm font-semibold mt-1.5 px-2.5 py-0.5 rounded-full border;
+
}
+
</style>
+5 -8
client/src/lib/components/EventCard.svelte
···
</script>
<div
-
class="group flex flex-col gap-2 p-1.5 md:p-3 min-h-64 bg-white dark:bg-gray-800/50 border border-gray-200 dark:border-gray-950 rounded-lg hover:shadow-lg md:hover:-translate-y-1 transition-all duration-200 transform"
+
class="group flex flex-col gap-2 p-1.5 md:p-3 min-h-64 bg-white border border-gray-200 rounded-lg hover:shadow-lg md:hover:-translate-y-1 transition-all duration-200 transform"
class:has-activity={isAnimating}
style="--border-thickness: {borderThickness}px"
>
<div class="flex items-start gap-2">
<div
-
class="text-sm font-bold text-blue-600 bg-blue-100 dark:bg-indigo-950 px-3 py-1 rounded-full"
+
class="text-sm font-bold text-blue-600 bg-blue-100 px-3 py-1 rounded-full"
>
#{index + 1}
</div>
<div
title={event.nsid}
-
class="font-mono text-sm text-gray-700 dark:text-gray-300 mt-0.5 leading-relaxed rounded-full text-nowrap text-ellipsis overflow-hidden group-hover:overflow-visible group-hover:bg-gray-50 dark:group-hover:bg-gray-700 border-gray-100 dark:border-gray-900 group-hover:border transition-all px-1"
+
class="font-mono text-sm text-gray-700 mt-0.5 leading-relaxed rounded-full text-nowrap text-ellipsis overflow-hidden group-hover:overflow-visible group-hover:bg-gray-50 border-gray-100 group-hover:border transition-all px-1"
>
{event.nsid}
</div>
···
</div>
</div>
-
<style lang="postcss">
+
<style>
.has-activity {
position: relative;
transition: all 0.2s ease-out;
}
.has-activity::before {
-
@reference "../../app.css";
-
@apply border-blue-500 dark:border-blue-800;
content: "";
position: absolute;
top: calc(-1 * var(--border-thickness));
left: calc(-1 * var(--border-thickness));
right: calc(-1 * var(--border-thickness));
bottom: calc(-1 * var(--border-thickness));
-
border-width: var(--border-thickness);
-
border-style: solid;
+
border: var(--border-thickness) solid rgba(59, 130, 246, 0.8);
border-radius: calc(0.5rem + var(--border-thickness));
pointer-events: none;
transition: all 0.3s ease-out;
+10 -5
client/src/lib/components/FilterControls.svelte
···
</script>
<div
-
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-blue-100 dark:bg-blue-900 hover:bg-blue-200 dark:hover:bg-blue-800 border-blue-300 dark:border-blue-700"
+
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-blue-100 hover:bg-blue-200 border-blue-300"
>
-
<label for="filter-regex" class="text-blue-800 dark:text-gray-200 mr-1">
-
filter:
-
</label>
+
<label for="filter-regex" class="text-blue-800 mr-1"> filter: </label>
<input
id="filter-regex"
value={filterRegex}
oninput={(e) => onFilterChange((e.target as HTMLInputElement).value)}
type="text"
placeholder="regex..."
-
class="bg-blue-50 dark:bg-blue-950 text-blue-900 dark:text-gray-400 placeholder-blue-400 dark:placeholder-blue-700 border border-blue-200 dark:border-blue-700 rounded-full px-1 outline-none focus:border-blue-400 min-w-0 w-24"
+
class="bg-blue-50 text-blue-900 placeholder-blue-400 border border-blue-200 rounded-full px-1 outline-none focus:bg-white focus:border-blue-400 min-w-0 w-24"
/>
</div>
+
+
<style lang="postcss">
+
@reference "../../app.css";
+
.wsbadge {
+
@apply text-sm font-semibold mt-1.5 px-2.5 py-0.5 rounded-full border;
+
}
+
</style>
+11 -6
client/src/lib/components/RefreshControl.svelte
···
</script>
<div
-
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-lime-100 dark:bg-lime-900 dark:hover:bg-lime-800 hover:bg-lime-200 border-lime-300 dark:border-lime-700"
+
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-green-100 hover:bg-green-200 border-green-300"
>
-
<label for="refresh-rate" class="text-lime-800 dark:text-lime-200 mr-1"
-
>refresh:</label
-
>
+
<label for="refresh-rate" class="text-green-800 mr-1">refresh:</label>
<input
id="refresh-rate"
value={refreshRate}
···
pattern="[0-9]*"
min="0"
placeholder="real-time"
-
class="bg-green-50 dark:bg-green-900 text-lime-900 dark:text-lime-200 placeholder-lime-600 dark:placeholder-lime-400 border border-lime-200 dark:border-lime-700 rounded-full px-1 outline-none focus:border-lime-400 min-w-0 w-20"
+
class="bg-green-50 text-green-900 placeholder-green-400 border border-green-200 rounded-full px-1 outline-none focus:bg-white focus:border-green-400 min-w-0 w-20"
/>
-
<span class="text-lime-800 dark:text-lime-200">s</span>
+
<span class="text-green-700">s</span>
</div>
+
+
<style lang="postcss">
+
@reference "../../app.css";
+
.wsbadge {
+
@apply text-sm font-semibold mt-1.5 px-2.5 py-0.5 rounded-full border;
+
}
+
</style>
-31
client/src/lib/components/ShowControls.svelte
···
-
<script lang="ts">
-
import type { ShowOption } from "$lib/types";
-
-
interface Props {
-
show: ShowOption;
-
onShowChange: (value: ShowOption) => void;
-
}
-
-
let { show, onShowChange }: Props = $props();
-
-
const showOptions: ShowOption[] = ["server init", "stream start"];
-
</script>
-
-
<div
-
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-pink-100 dark:bg-pink-800 hover:bg-pink-200 dark:hover:bg-pink-700 border-pink-300 dark:border-pink-700"
-
>
-
<label for="show" class="text-pink-800 dark:text-pink-100 mr-1">
-
show since:
-
</label>
-
<select
-
id="show"
-
value={show}
-
onchange={(e) =>
-
onShowChange((e.target as HTMLSelectElement).value as ShowOption)}
-
class="bg-pink-50 dark:bg-pink-900 text-pink-900 dark:text-pink-100 border border-pink-200 dark:border-pink-700 rounded-full px-1 outline-none focus:border-pink-400 min-w-0"
-
>
-
{#each showOptions as option}
-
<option value={option}>{option}</option>
-
{/each}
-
</select>
-
</div>
+10 -5
client/src/lib/components/SortControls.svelte
···
</script>
<div
-
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-purple-100 dark:bg-purple-800 hover:bg-purple-200 dark:hover:bg-purple-700 border-purple-300 dark:border-purple-700"
+
class="wsbadge !pl-2 !px-1 !mt-0 !font-normal bg-purple-100 hover:bg-purple-200 border-purple-300"
>
-
<label for="sort-by" class="text-purple-800 dark:text-purple-300 mr-1">
-
sort by:
-
</label>
+
<label for="sort-by" class="text-purple-800 mr-1"> sort by: </label>
<select
id="sort-by"
value={sortBy}
onchange={(e) =>
onSortChange((e.target as HTMLSelectElement).value as SortOption)}
-
class="bg-purple-50 dark:bg-purple-900 text-purple-900 dark:text-purple-300 border border-purple-200 dark:border-purple-700 rounded-full px-1 outline-none focus:border-purple-400 min-w-0"
+
class="bg-purple-50 text-purple-900 border border-purple-200 rounded-full px-1 outline-none focus:bg-white focus:border-purple-400 min-w-0"
>
{#each sortOptions as option}
<option value={option.value}>{option.label}</option>
{/each}
</select>
</div>
+
+
<style lang="postcss">
+
@reference "../../app.css";
+
.wsbadge {
+
@apply text-sm font-semibold mt-1.5 px-2.5 py-0.5 rounded-full border;
+
}
+
</style>
+18 -17
client/src/lib/components/StatsCard.svelte
···
<script lang="ts">
import { formatNumber } from "$lib/format";
+
import NumberFlow from "@number-flow/svelte";
const colorClasses = {
green: {
-
bg: "from-green-50 to-green-100 dark:from-green-900 dark:to-green-800",
-
border: "border-green-200 dark:border-green-800",
-
titleText: "text-green-700 dark:text-green-400",
-
valueText: "text-green-900 dark:text-green-200",
+
bg: "from-green-50 to-green-100",
+
border: "border-green-200",
+
titleText: "text-green-700",
+
valueText: "text-green-900",
},
red: {
-
bg: "from-red-50 to-red-100 dark:from-red-900 dark:to-red-800",
-
border: "border-red-200 dark:border-red-800",
-
titleText: "text-red-700 dark:text-red-400",
-
valueText: "text-red-900 dark:text-red-200",
+
bg: "from-red-50 to-red-100",
+
border: "border-red-200",
+
titleText: "text-red-700",
+
valueText: "text-red-900",
},
turqoise: {
-
bg: "from-teal-50 to-teal-100 dark:from-teal-900 dark:to-teal-800",
-
border: "border-teal-200 dark:border-teal-800",
-
titleText: "text-teal-700 dark:text-teal-400",
-
valueText: "text-teal-900 dark:text-teal-200",
+
bg: "from-teal-50 to-teal-100",
+
border: "border-teal-200",
+
titleText: "text-teal-700",
+
valueText: "text-teal-900",
},
orange: {
-
bg: "from-orange-50 to-orange-100 dark:from-orange-900 dark:to-orange-800",
-
border: "border-orange-200 dark:border-orange-800",
-
titleText: "text-orange-700 dark:text-orange-400",
-
valueText: "text-orange-900 dark:text-orange-200",
+
bg: "from-orange-50 to-orange-100",
+
border: "border-orange-200",
+
titleText: "text-orange-700",
+
valueText: "text-orange-900",
},
};
···
{title}
</h3>
<p class="text-xl md:text-2xl font-bold {colors.valueText}">
-
{formatNumber(value)}
+
<NumberFlow {value} />
</p>
</div>
+14 -18
client/src/lib/components/StatusBadge.svelte
···
const statusConfig = {
connected: {
text: "stream live",
-
classes:
-
"bg-green-100 dark:bg-green-900 text-green-800 dark:text-green-200 border-green-200 dark:border-green-800",
+
classes: "bg-green-100 text-green-800 border-green-200",
},
connecting: {
text: "stream connecting",
-
classes:
-
"bg-yellow-100 dark:bg-yellow-900 text-yellow-800 dark:text-yellow-200 border-yellow-200 dark:border-yellow-800",
+
classes: "bg-yellow-100 text-yellow-800 border-yellow-200",
},
error: {
text: "stream errored",
-
classes:
-
"bg-red-100 dark:bg-red-900 text-red-800 dark:text-red-200 border-red-200 dark:border-red-800",
+
classes: "bg-red-100 text-red-800 border-red-200",
},
disconnected: {
text: "stream offline",
-
classes:
-
"bg-gray-100 dark:bg-gray-900 text-gray-800 dark:text-gray-200 border-gray-200 dark:border-gray-800",
+
classes: "bg-gray-100 text-gray-800 border-gray-200",
},
};
const config = $derived(statusConfig[status]);
</script>
-
<div class="flex flex-row items-center gap-2 wsbadge {config.classes}">
-
<!-- connecting spinner -->
-
{#if status === "connecting"}
-
<div
-
class="animate-spin rounded-full h-4 w-4 border-b-2 border-yellow-800 dark:border-yellow-200"
-
></div>
-
{/if}
-
<!-- status text -->
-
<span>{config.text}</span>
-
</div>
+
<span class="wsbadge {config.classes}">
+
{config.text}
+
</span>
+
+
<style lang="postcss">
+
@reference "../../app.css";
+
.wsbadge {
+
@apply text-sm font-semibold mt-1.5 px-2.5 py-0.5 rounded-full border;
+
}
+
</style>
+1 -5
client/src/lib/format.ts
···
return num.toLocaleString();
};
-
const isValidDate = (d: Date) => d instanceof Date && !isNaN(d.getTime());
export const formatTimestamp = (timestamp: number): string => {
-
const date = new Date(timestamp * 1000);
-
return isValidDate(date)
-
? date.toLocaleString()
-
: new Date(timestamp / 1000).toLocaleString();
+
return new Date(timestamp / 1000).toLocaleString();
};
-1
client/src/lib/types.ts
···
};
export type SortOption = "total" | "created" | "deleted" | "date";
-
export type ShowOption = "server init" | "stream start";
+2 -3
client/src/routes/+layout.ts
···
-
export const prerender = false;
-
export const ssr = true;
-
export const csr = true;
+
export const prerender = true;
+
export const ssr = false;
-7
client/src/routes/+page.server.ts
···
-
import { fetchEvents, fetchTrackingSince } from "$lib/api";
-
-
export const load = async () => {
-
const events = await fetchEvents();
-
const trackingSince = await fetchTrackingSince();
-
return { events, trackingSince };
-
};
+51 -109
client/src/routes/+page.svelte
···
<script lang="ts">
import { dev } from "$app/environment";
-
import type {
-
EventRecord,
-
Events,
-
NsidCount,
-
ShowOption,
-
Since,
-
SortOption,
-
} from "$lib/types";
+
import type { EventRecord, NsidCount, SortOption } from "$lib/types";
import { onMount, onDestroy } from "svelte";
-
import { get, writable } from "svelte/store";
+
import { writable } from "svelte/store";
import { PUBLIC_API_URL } from "$env/static/public";
import { fetchEvents, fetchTrackingSince } from "$lib/api";
import { createRegexFilter } from "$lib/filter";
···
import BskyToggle from "$lib/components/BskyToggle.svelte";
import RefreshControl from "$lib/components/RefreshControl.svelte";
import { formatTimestamp } from "$lib/format";
-
import ShowControls from "$lib/components/ShowControls.svelte";
-
type Props = {
-
data: { events: Events; trackingSince: Since };
-
};
-
-
const { data }: Props = $props();
-
-
const events = writable(
-
new Map<string, EventRecord>(Object.entries(data.events.events)),
-
);
-
const eventsStart = new Map<string, EventRecord>(
-
Object.entries(data.events.events),
-
);
+
const events = writable(new Map<string, EventRecord>());
const pendingUpdates = new Map<string, EventRecord>();
-
-
let updateTimer: NodeJS.Timeout | null = null;
-
let per_second = $state(data.events.per_second);
-
let tracking_since = $state(data.trackingSince.since);
-
-
const diffEvents = (
-
oldEvents: Map<string, EventRecord>,
-
newEvents: Map<string, EventRecord>,
-
): NsidCount[] => {
-
const nsidCounts: NsidCount[] = [];
-
for (const [nsid, event] of newEvents.entries()) {
-
const oldEvent = oldEvents.get(nsid);
-
if (oldEvent) {
-
const counts = {
-
nsid,
-
count: event.count - oldEvent.count,
-
deleted_count: event.deleted_count - oldEvent.deleted_count,
-
last_seen: event.last_seen,
-
};
-
if (counts.count > 0 || counts.deleted_count > 0)
-
nsidCounts.push(counts);
-
} else {
-
nsidCounts.push({
-
nsid,
-
...event,
-
});
-
}
-
}
-
return nsidCounts;
-
};
-
const applyEvents = (newEvents: Record<string, EventRecord>) => {
-
events.update((map) => {
-
for (const [nsid, event] of Object.entries(newEvents)) {
-
map.set(nsid, event);
-
}
-
return map;
-
});
-
};
-
-
let error: string | null = $state(null);
-
let filterRegex = $state("");
-
let dontShowBsky = $state(false);
-
let sortBy: SortOption = $state("total");
-
let refreshRate = $state("");
-
let changedByUser = $state(false);
-
let show: ShowOption = $state("server init");
let eventsList: NsidCount[] = $state([]);
-
let updateEventsList = $derived((value: Map<string, EventRecord>) => {
-
switch (show) {
-
case "server init":
-
eventsList = value
-
.entries()
-
.map(([nsid, event]) => ({
-
nsid,
-
...event,
-
}))
-
.toArray();
-
break;
-
case "stream start":
-
eventsList = diffEvents(eventsStart, value);
-
break;
-
}
+
let updateTimer: NodeJS.Timeout | null = null;
+
events.subscribe((value) => {
+
eventsList = value
+
.entries()
+
.map(([nsid, event]) => ({
+
nsid,
+
...event,
+
}))
+
.toArray();
});
-
events.subscribe((value) => updateEventsList(value));
+
let per_second = $state(0);
+
let tracking_since = $state(0);
+
let all: EventRecord = $derived(
eventsList.reduce(
(acc, event) => {
···
},
),
);
+
let error: string | null = $state(null);
+
let filterRegex = $state("");
+
let dontShowBsky = $state(false);
+
let sortBy: SortOption = $state("total");
+
let refreshRate = $state("");
+
let changedByUser = $state(false);
let websocket: WebSocket | null = null;
let isStreamOpen = $state(false);
···
};
websocket.onmessage = async (event) => {
const jsonData = JSON.parse(event.data);
-
per_second = jsonData.per_second;
+
+
if (jsonData.per_second > 0) {
+
per_second = jsonData.per_second;
+
}
+
+
// Store updates in pending map if refresh rate is set
if (refreshRate) {
for (const [nsid, event] of Object.entries(jsonData.events)) {
pendingUpdates.set(nsid, event as EventRecord);
}
} else {
-
applyEvents(jsonData.events);
+
// Apply updates immediately if no refresh rate
+
events.update((map) => {
+
for (const [nsid, event] of Object.entries(
+
jsonData.events,
+
)) {
+
map.set(nsid, event as EventRecord);
+
}
+
return map;
+
});
}
};
websocket.onerror = (error) => {
···
error = null;
const data = await fetchEvents();
per_second = data.per_second;
-
applyEvents(data.events);
+
events.update((map) => {
+
for (const [nsid, event] of Object.entries(data.events)) {
+
map.set(nsid, event);
+
}
+
return map;
+
});
tracking_since = (await fetchTrackingSince()).since;
} catch (err) {
error =
···
/>
</svelte:head>
-
<header
-
class="bg-white dark:bg-gray-900 border-gray-300 dark:border-gray-950 border-b mb-4 pb-2"
-
>
+
<header class="border-gray-300 border-b mb-4 pb-2">
<div
class="px-2 md:ml-[19vw] mx-auto flex flex-wrap items-center text-center"
>
-
<h1 class="text-4xl font-bold mr-4 text-gray-900 dark:text-gray-200">
-
lexicon tracker
-
</h1>
-
<p class="text-lg mt-1 text-gray-600 dark:text-gray-300">
+
<h1 class="text-4xl font-bold mr-4 text-gray-900">lexicon tracker</h1>
+
<p class="text-lg mt-1 text-gray-600">
tracks lexicons seen on the jetstream {tracking_since === 0
? ""
: `(since: ${formatTimestamp(tracking_since)})`}
</p>
</div>
</header>
-
<div class="bg-white dark:bg-gray-900 md:max-w-[61vw] mx-auto p-2">
+
<div class="md:max-w-[61vw] mx-auto p-2">
<div class="min-w-fit grid grid-cols-2 xl:grid-cols-4 gap-2 2xl:gap-6 mb-8">
<StatsCard
title="total creation"
···
{#if error}
<div
-
class="bg-red-100 dark:bg-red-900 border border-red-300 dark:border-red-700 text-red-700 dark:text-red-200 px-4 py-3 rounded-lg mb-6"
+
class="bg-red-100 border border-red-300 text-red-700 px-4 py-3 rounded-lg mb-6"
>
<p>Error: {error}</p>
</div>
···
{#if eventsList.length > 0}
<div class="mb-8">
<div class="flex flex-wrap items-center gap-3 mb-3">
-
<h2 class="text-2xl font-bold text-gray-900 dark:text-gray-200">
-
seen lexicons
-
</h2>
+
<h2 class="text-2xl font-bold text-gray-900">seen lexicons</h2>
<StatusBadge status={websocketStatus} />
</div>
<div class="flex flex-wrap items-center gap-1.5 mb-6">
···
refreshRate = "";
}}
/>
-
<ShowControls
-
{show}
-
onShowChange={(value: ShowOption) => {
-
show = value;
-
updateEventsList(get(events));
-
}}
-
/>
<RefreshControl
{refreshRate}
onRefreshChange={(value) => {
···
{/if}
</div>
-
<footer class="py-2 border-t border-gray-200 dark:border-gray-800 text-center">
-
<p class="text-gray-600 dark:text-gray-200 text-sm">
+
<footer class="py-2 border-t border-gray-200 text-center">
+
<p class="text-gray-600 text-sm">
source code <a
href="https://tangled.sh/@poor.dog/nsid-tracker"
target="_blank"
rel="noopener noreferrer"
-
class="text-blue-600 dark:text-blue-400 hover:text-blue-800 dark:hover:text-blue-600 underline"
+
class="text-blue-600 hover:text-blue-800 underline"
>@poor.dog/nsid-tracker</a
>
</p>
+1 -1
client/svelte.config.js
···
-
import adapter from "svelte-adapter-bun";
+
import adapter from "@sveltejs/adapter-static";
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
/** @type {import('@sveltejs/kit').Config} */
+1 -1
nix/client-modules.nix
···
src = ../client;
-
outputHash = "sha256-njwXk3u0NUsYWLv9EOdCltgQOjTVkcfu+D+0COSw/6I=";
+
outputHash = "sha256-t8PJFo+3XGkzmMNbw9Rf9cS5Ob5YtI8ucX3ay+u9a3M=";
outputHashAlgo = "sha256";
outputHashMode = "recursive";
+2 -10
nix/client.nix
···
{
-
lib,
stdenv,
makeBinaryWrapper,
bun,
···
'';
buildPhase = ''
runHook preBuild
-
bun --prefer-offline run build
+
bun --prefer-offline run --bun build
runHook postBuild
'';
installPhase = ''
runHook preInstall
-
-
mkdir -p $out/bin
+
mkdir -p $out
cp -R ./build/* $out
-
cp -R ./node_modules $out
-
-
makeBinaryWrapper ${bun}/bin/bun $out/bin/website \
-
--prefix PATH : ${lib.makeBinPath [ bun ]} \
-
--add-flags "run --bun --no-install --cwd $out start"
-
runHook postInstall
'';
}
+28 -42
server/Cargo.lock
···
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
-
name = "ahash"
-
version = "0.8.12"
-
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
-
dependencies = [
-
"cfg-if",
-
"getrandom 0.3.3",
-
"once_cell",
-
"serde",
-
"version_check",
-
"zerocopy",
-
]
-
-
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
-
-
[[package]]
-
name = "arc-swap"
-
version = "1.7.1"
-
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]]
name = "async-compression"
···
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
+
[[package]]
+
name = "cmake"
+
version = "0.1.54"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
+
dependencies = [
+
"cc",
+
]
[[package]]
name = "combine"
···
name = "server"
version = "0.1.0"
dependencies = [
-
"ahash",
"anyhow",
-
"arc-swap",
"async-trait",
"axum",
"axum-tws",
···
"serde",
"serde_json",
"smol_str",
+
"snmalloc-rs",
"threadpool",
"tikv-jemallocator",
"tokio",
···
dependencies = [
"borsh",
"serde",
+
]
+
+
[[package]]
+
name = "snmalloc-rs"
+
version = "0.3.8"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "eb317153089fdfa4d8a2eec059d40a5a23c3bde43995ea23b19121c3f621e74a"
+
dependencies = [
+
"snmalloc-sys",
+
]
+
+
[[package]]
+
name = "snmalloc-sys"
+
version = "0.3.8"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "065fea53d32bb77bc36cca466cb191f2e5216ebfd0ed360b1d64889ee6e559ea"
+
dependencies = [
+
"cmake",
[[package]]
···
version = "0.8.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
-
-
[[package]]
-
name = "zerocopy"
-
version = "0.8.26"
-
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
-
dependencies = [
-
"zerocopy-derive",
-
]
-
-
[[package]]
-
name = "zerocopy-derive"
-
version = "0.8.26"
-
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
-
dependencies = [
-
"proc-macro2",
-
"quote",
-
"syn",
-
]
[[package]]
name = "zeroize"
+2 -2
server/Cargo.toml
···
rayon = "1.10.0"
parking_lot = { version = "0.12", features = ["send_guard", "hardware-lock-elision"] }
rclite = "0.2.7"
-
arc-swap = "1.7.1"
-
ahash = { version = "0.8.12", features = ["serde"] }
+
[target.'cfg(target_env = "msvc")'.dependencies]
+
snmalloc-rs = "0.3.8"
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.6"
+18 -16
server/src/api.rs
···
use std::{
+
collections::HashMap,
fmt::Display,
net::SocketAddr,
ops::{Bound, Deref, RangeBounds},
time::Duration,
};
-
use ahash::AHashMap;
use anyhow::anyhow;
use axum::{
Json, Router,
···
#[derive(Serialize)]
struct Events {
per_second: usize,
-
events: AHashMap<SmolStr, NsidCount>,
+
events: HashMap<SmolStr, NsidCount>,
}
async fn events(db: State<Arc<Db>>) -> AppResult<Json<Events>> {
-
let mut events = AHashMap::new();
+
let mut events = HashMap::new();
for result in db.get_counts() {
let (nsid, counts) = result?;
events.insert(
···
) -> AppResult<Json<Vec<Hit>>> {
let from = params.to.map(Bound::Included).unwrap_or(Bound::Unbounded);
let to = params.from.map(Bound::Included).unwrap_or(Bound::Unbounded);
+
let maybe_hits = db
+
.get_hits(&params.nsid, HitsRange { from, to })
+
.take(MAX_HITS);
+
let mut hits = Vec::with_capacity(maybe_hits.size_hint().0);
-
db.get_hits(&params.nsid, HitsRange { from, to }, MAX_HITS)
-
.take(MAX_HITS)
-
.try_fold(Vec::with_capacity(MAX_HITS), |mut acc, hit| {
-
let hit = hit?;
-
let hit_data = hit.deser()?;
+
for maybe_hit in maybe_hits {
+
let hit = maybe_hit?;
+
let hit_data = hit.deser()?;
-
acc.push(Hit {
-
timestamp: hit.timestamp,
-
deleted: hit_data.deleted,
-
});
-
Ok(acc)
-
})
-
.map(Json)
+
hits.push(Hit {
+
timestamp: hit.timestamp,
+
deleted: hit_data.deleted,
+
});
+
}
+
+
Ok(Json(hits))
}
async fn stream_events(db: State<Arc<Db>>, ws: WebSocketUpgrade) -> Response {
···
(async move {
let mut listener = db.new_listener();
let mut data = Events {
-
events: AHashMap::<SmolStr, NsidCount>::with_capacity(10),
+
events: HashMap::<SmolStr, NsidCount>::with_capacity(10),
per_second: 0,
};
let mut updates = 0;
+26 -54
server/src/db/handle.rs
···
use std::{
fmt::Debug,
io::Cursor,
-
ops::{Bound, RangeBounds},
+
ops::{Bound, Deref, RangeBounds},
sync::atomic::{AtomicU64, Ordering as AtomicOrdering},
time::Duration,
};
use byteview::ByteView;
-
use fjall::{Keyspace, Partition, PartitionCreateOptions, Slice, Snapshot};
+
use fjall::{Keyspace, Partition, PartitionCreateOptions, Slice};
use itertools::Itertools;
use parking_lot::Mutex;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
···
use crate::{
db::{EventRecord, NsidHit, block},
-
error::{AppError, AppResult},
-
utils::{
-
ArcRefCnt, ArcliteSwap, CLOCK, DefaultRateTracker, RateTracker, ReadVariableExt,
-
varints_unsigned_encoded,
-
},
+
error::AppResult,
+
utils::{CLOCK, DefaultRateTracker, RateTracker, ReadVariableExt, varints_unsigned_encoded},
};
pub type ItemDecoder = block::ItemDecoder<Cursor<Slice>, NsidHit>;
···
}
pub struct LexiconHandle {
-
write_tree: Partition,
-
read_tree: ArcliteSwap<Snapshot>,
+
tree: Partition,
nsid: SmolStr,
buf: Arc<Mutex<Vec<EventRecord>>>,
last_insert: AtomicU64, // relaxed
···
}
}
+
impl Deref for LexiconHandle {
+
type Target = Partition;
+
+
fn deref(&self) -> &Self::Target {
+
&self.tree
+
}
+
}
+
impl LexiconHandle {
pub fn new(keyspace: &Keyspace, nsid: &str) -> Self {
let opts = PartitionCreateOptions::default()
-
.block_size(1024 * 48)
+
.block_size(1024 * 128)
.compression(fjall::CompressionType::Miniz(9));
-
let write_tree = keyspace.open_partition(nsid, opts).unwrap();
-
let read_tree = ArcliteSwap::new(ArcRefCnt::new(write_tree.snapshot()));
Self {
-
write_tree,
-
read_tree,
+
tree: keyspace.open_partition(nsid, opts).unwrap(),
nsid: nsid.into(),
buf: Default::default(),
last_insert: AtomicU64::new(0),
···
}
}
-
#[inline(always)]
-
pub fn read(&self) -> arc_swap::Guard<ArcRefCnt<Snapshot>> {
-
self.read_tree.load()
-
}
-
-
#[inline(always)]
-
pub fn update_tree(&self) {
-
self.read_tree
-
.store(ArcRefCnt::new(self.write_tree.snapshot()));
-
}
-
-
#[inline(always)]
-
pub fn span(&self) -> tracing::Span {
-
tracing::info_span!("handle", nsid = %self.nsid)
-
}
-
-
#[inline(always)]
pub fn nsid(&self) -> &SmolStr {
&self.nsid
}
-
#[inline(always)]
pub fn item_count(&self) -> usize {
self.buf.lock().len()
}
-
pub fn since_last_activity(&self) -> Duration {
-
Duration::from_nanos(
-
CLOCK.delta_as_nanos(self.last_insert.load(AtomicOrdering::Relaxed), CLOCK.raw()),
-
)
+
pub fn since_last_activity(&self) -> u64 {
+
CLOCK.delta_as_nanos(self.last_insert.load(AtomicOrdering::Relaxed), CLOCK.raw())
}
pub fn suggested_block_size(&self) -> usize {
···
range: impl RangeBounds<u64>,
sort: bool,
) -> AppResult<()> {
-
let _span = self.span().entered();
-
let start_limit = match range.start_bound().cloned() {
Bound::Included(start) => start,
Bound::Excluded(start) => start.saturating_add(1),
···
let end_key = varints_unsigned_encoded([end_limit]);
let blocks_to_compact = self
-
.read()
+
.tree
.range(start_key..end_key)
.collect::<Result<Vec<_>, _>>()?;
if blocks_to_compact.len() < 2 {
+
tracing::info!("{}: nothing to compact", self.nsid);
return Ok(());
}
···
let end_blocks_size = new_blocks.len();
for key in keys_to_delete {
-
self.write_tree.remove(key.clone())?;
+
self.tree.remove(key.clone())?;
}
for block in new_blocks {
-
self.write_tree.insert(block.key, block.data)?;
+
self.tree.insert(block.key, block.data)?;
}
-
let reduction =
-
((start_blocks_size - end_blocks_size) as f64 / start_blocks_size as f64) * 100.0;
tracing::info!(
-
{
-
start = start_blocks_size,
-
end = end_blocks_size,
-
},
-
"blocks compacted {reduction:.2}%",
+
"{}: compacted {} blocks to {} blocks ({}% reduction)",
+
self.nsid,
+
start_blocks_size,
+
end_blocks_size,
+
((start_blocks_size - end_blocks_size) as f64 / start_blocks_size as f64) * 100.0,
);
Ok(())
-
}
-
-
pub fn insert_block(&self, block: Block) -> AppResult<()> {
-
self.write_tree
-
.insert(block.key, block.data)
-
.map_err(AppError::from)
}
pub fn encode_block_from_items(
+82 -139
server/src/db/mod.rs
···
use std::{
+
collections::HashMap,
fmt::Debug,
io::Cursor,
ops::{Bound, Deref, RangeBounds},
-
path::Path,
+
path::{Path, PathBuf},
time::Duration,
-
u64,
};
-
use ahash::{AHashMap, AHashSet};
use byteview::StrView;
-
use fjall::{Keyspace, Partition, PartitionCreateOptions};
+
use fjall::{Config, Keyspace, Partition, PartitionCreateOptions};
use itertools::{Either, Itertools};
-
use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use rclite::Arc;
use rkyv::{Archive, Deserialize, Serialize, rancor::Error};
use smol_str::{SmolStr, ToSmolStr};
···
db::handle::{ItemDecoder, LexiconHandle},
error::{AppError, AppResult},
jetstream::JetstreamEvent,
-
utils::{CLOCK, RateTracker, ReadVariableExt, varints_unsigned_encoded},
+
utils::{RateTracker, ReadVariableExt, varints_unsigned_encoded},
};
mod block;
···
}
pub struct DbInfo {
-
pub nsids: AHashMap<SmolStr, Vec<usize>>,
+
pub nsids: HashMap<SmolStr, Vec<usize>>,
pub disk_size: u64,
}
···
pub ks_config: fjall::Config,
pub min_block_size: usize,
pub max_block_size: usize,
-
pub max_last_activity: Duration,
+
pub max_last_activity: u64,
}
impl DbConfig {
···
impl Default for DbConfig {
fn default() -> Self {
Self {
-
ks_config: fjall::Config::default()
-
.cache_size(1024 * 1024 * 512)
-
.max_write_buffer_size(u64::MAX),
-
min_block_size: 1000,
-
max_block_size: 250_000,
-
max_last_activity: Duration::from_secs(10),
+
ks_config: fjall::Config::default(),
+
min_block_size: 512,
+
max_block_size: 500_000,
+
max_last_activity: Duration::from_secs(10).as_nanos() as u64,
}
}
}
···
pub cfg: DbConfig,
pub ks: Keyspace,
counts: Partition,
-
hits: scc::HashIndex<SmolStr, Arc<LexiconHandle>, ahash::RandomState>,
+
hits: scc::HashIndex<SmolStr, Arc<LexiconHandle>>,
sync_pool: threadpool::ThreadPool,
event_broadcaster: broadcast::Sender<(SmolStr, NsidCounts)>,
-
eps: RateTracker<100>, // 100 millis buckets
+
eps: RateTracker<100>,
cancel_token: CancellationToken,
}
···
}
pub fn sync(&self, all: bool) -> AppResult<()> {
-
let start = CLOCK.now();
// prepare all the data
-
let nsids_len = self.hits.len();
-
let mut data = Vec::with_capacity(nsids_len);
-
let mut nsids = AHashSet::with_capacity(nsids_len);
+
let mut data = Vec::with_capacity(self.hits.len());
let _guard = scc::ebr::Guard::new();
-
for (nsid, handle) in self.hits.iter(&_guard) {
+
for (_, handle) in self.hits.iter(&_guard) {
let mut nsid_data = Vec::with_capacity(2);
-
// let mut total_count = 0;
+
let mut total_count = 0;
let is_too_old = handle.since_last_activity() > self.cfg.max_last_activity;
// if we disconnect for a long time, we want to sync all of what we
// have to avoid having many small blocks (even if we run compaction
···
let count = handle.item_count();
let data_count = count / block_size;
if count > 0 && (all || data_count > 0 || is_too_old) {
-
for _ in 0..data_count {
-
nsid_data.push((handle.clone(), block_size));
-
// total_count += block_size;
+
for i in 0..data_count {
+
nsid_data.push((i, handle.clone(), block_size));
+
total_count += block_size;
}
// only sync remainder if we haven't met block size
let remainder = count % block_size;
if (all || data_count == 0) && remainder > 0 {
-
nsid_data.push((handle.clone(), remainder));
-
// total_count += remainder;
+
nsid_data.push((data_count, handle.clone(), remainder));
+
total_count += remainder;
}
}
-
let _span = handle.span().entered();
-
if nsid_data.len() > 0 {
-
// tracing::info!(
-
// {blocks = %nsid_data.len(), count = %total_count},
-
// "will encode & sync",
-
// );
-
nsids.insert(nsid.clone());
-
data.push(nsid_data);
-
}
+
tracing::info!(
+
"{}: will sync {} blocks ({} count)",
+
handle.nsid(),
+
nsid_data.len(),
+
total_count,
+
);
+
data.push(nsid_data);
}
drop(_guard);
···
.map(|chunk| {
chunk
.into_iter()
-
.map(|(handle, max_block_size)| {
-
(handle.take_block_items(max_block_size), handle)
+
.map(|(i, handle, max_block_size)| {
+
(i, handle.take_block_items(max_block_size), handle)
})
.collect::<Vec<_>>()
.into_par_iter()
-
.map(|(items, handle)| {
+
.map(|(i, items, handle)| {
let count = items.len();
let block = LexiconHandle::encode_block_from_items(items, count)?;
-
AppResult::Ok((block, handle))
+
tracing::info!(
+
"{}: encoded block with {} items",
+
handle.nsid(),
+
block.written,
+
);
+
AppResult::Ok((i, block, handle))
})
.collect::<Result<Vec<_>, _>>()
})
.try_for_each(|chunk| {
let chunk = chunk?;
-
for (block, handle) in chunk {
-
self.sync_pool.execute(move || {
-
let _span = handle.span().entered();
-
let written = block.written;
-
match handle.insert_block(block) {
+
for (i, block, handle) in chunk {
+
self.sync_pool
+
.execute(move || match handle.insert(block.key, block.data) {
Ok(_) => {
-
tracing::info!({count = %written}, "synced")
+
tracing::info!("{}: [{i}] synced {}", block.written, handle.nsid())
}
-
Err(err) => tracing::error!({ err = %err }, "failed to sync block"),
-
}
-
});
+
Err(err) => tracing::error!("failed to sync block: {}", err),
+
});
}
AppResult::Ok(())
})?;
self.sync_pool.join();
-
// update snapshots for all (changed) handles
-
for nsid in nsids {
-
self.hits.peek_with(&nsid, |_, handle| handle.update_tree());
-
}
-
-
tracing::info!(time = %start.elapsed().as_secs_f64(), "synced all blocks");
-
Ok(())
}
···
let Some(handle) = self.get_handle(nsid) else {
return Ok(());
};
-
handle.compact(max_count, range, sort)?;
-
handle.update_tree();
-
Ok(())
+
handle.compact(max_count, range, sort)
}
pub fn compact_all(
···
pub fn major_compact(&self) -> AppResult<()> {
self.compact_all(self.cfg.max_block_size, .., true)?;
+
let _guard = scc::ebr::Guard::new();
+
for (_, handle) in self.hits.iter(&_guard) {
+
handle.deref().major_compact()?;
+
}
Ok(())
}
···
}
pub fn ingest_events(&self, events: impl Iterator<Item = EventRecord>) -> AppResult<()> {
-
let mut seen_events = 0;
for (key, chunk) in events.chunk_by(|event| event.nsid.clone()).into_iter() {
let mut counts = self.get_count(&key)?;
+
let mut count = 0;
self.ensure_handle(&key).queue(chunk.inspect(|e| {
// increment count
counts.last_seen = e.timestamp;
···
} else {
counts.count += 1;
}
-
seen_events += 1;
+
count += 1;
}));
+
self.eps.observe(count);
self.insert_count(&key, &counts)?;
if self.event_broadcaster.receiver_count() > 0 {
let _ = self.event_broadcaster.send((key, counts));
}
}
-
self.eps.observe(seen_events);
Ok(())
}
···
}
pub fn info(&self) -> AppResult<DbInfo> {
-
let mut nsids = AHashMap::new();
+
let mut nsids = HashMap::new();
for nsid in self.get_nsids() {
let Some(handle) = self.get_handle(&nsid) else {
continue;
};
-
let block_lens = handle
-
.read()
-
.iter()
-
.rev()
-
.try_fold(Vec::new(), |mut acc, item| {
-
let (key, value) = item?;
-
let mut timestamps = Cursor::new(key);
-
let start_timestamp = timestamps.read_varint()?;
-
let decoder = ItemDecoder::new(Cursor::new(value), start_timestamp)?;
-
acc.push(decoder.item_count());
-
AppResult::Ok(acc)
-
})?;
+
let block_lens = handle.iter().rev().try_fold(Vec::new(), |mut acc, item| {
+
let (key, value) = item?;
+
let mut timestamps = Cursor::new(key);
+
let start_timestamp = timestamps.read_varint()?;
+
let decoder = ItemDecoder::new(Cursor::new(value), start_timestamp)?;
+
acc.push(decoder.item_count());
+
AppResult::Ok(acc)
+
})?;
nsids.insert(nsid.to_smolstr(), block_lens);
}
Ok(DbInfo {
···
&self,
nsid: &str,
range: impl RangeBounds<u64> + std::fmt::Debug,
-
max_items: usize,
) -> impl Iterator<Item = AppResult<handle::Item>> {
let start_limit = match range.start_bound().cloned() {
Bound::Included(start) => start,
···
return Either::Right(std::iter::empty());
};
-
// let mut ts = CLOCK.now();
-
let map_block = move |(res, current_item_count)| -> AppResult<(Option<_>, usize)> {
-
if current_item_count >= max_items {
-
return Ok((None, current_item_count));
-
}
-
let (key, val) = res?;
+
let map_block = move |(key, val)| {
let mut key_reader = Cursor::new(key);
let start_timestamp = key_reader.read_varint::<u64>()?;
-
// let end_timestamp = key_reader.read_varint::<u64>()?;
if start_timestamp < start_limit {
-
// tracing::info!(
-
// "stopped at block with timestamps {start_timestamp}..{end_timestamp} because {start_limit} is greater"
-
// );
-
return Ok((None, current_item_count));
+
return Ok(None);
}
-
let decoder = handle::ItemDecoder::new(Cursor::new(val), start_timestamp)?;
-
let current_item_count = current_item_count + decoder.item_count();
-
// tracing::info!(
-
// "took {}ns to get block with size {}",
-
// ts.elapsed().as_nanos(),
-
// decoder.item_count()
-
// );
-
// ts = CLOCK.now();
-
Ok((
-
Some(
-
decoder
-
.take_while(move |item| {
-
item.as_ref().map_or(true, |item| {
-
item.timestamp <= end_limit && item.timestamp >= start_limit
-
})
-
})
-
.map(|res| res.map_err(AppError::from)),
-
),
-
current_item_count,
-
))
+
let items = handle::ItemDecoder::new(Cursor::new(val), start_timestamp)?
+
.take_while(move |item| {
+
item.as_ref().map_or(true, |item| {
+
item.timestamp <= end_limit && item.timestamp >= start_limit
+
})
+
})
+
.map(|res| res.map_err(AppError::from));
+
Ok(Some(items))
};
-
let (blocks, _counted) = handle
-
.read()
-
.range(..end_key)
-
.map(|res| res.map_err(AppError::from))
-
.rev()
-
.fold_while(
-
(Vec::with_capacity(20), 0),
-
|(mut blocks, current_item_count), res| {
-
use itertools::FoldWhile::*;
-
-
match map_block((res, current_item_count)) {
-
Ok((Some(block), current_item_count)) => {
-
blocks.push(Ok(block));
-
Continue((blocks, current_item_count))
-
}
-
Ok((None, current_item_count)) => Done((blocks, current_item_count)),
-
Err(err) => {
-
blocks.push(Err(err));
-
Done((blocks, current_item_count))
-
}
-
}
-
},
-
)
-
.into_inner();
-
-
// tracing::info!(
-
// "got blocks with size {}, item count {counted}",
-
// blocks.len()
-
// );
-
-
Either::Left(blocks.into_iter().rev().flatten().flatten())
+
Either::Left(
+
handle
+
.range(..end_key)
+
.rev()
+
.map_while(move |res| res.map_err(AppError::from).and_then(map_block).transpose())
+
.collect::<Vec<_>>()
+
.into_iter()
+
.rev()
+
.flatten()
+
.flatten(),
+
)
}
pub fn tracking_since(&self) -> AppResult<u64> {
···
let Some(handle) = self.get_handle("app.bsky.feed.like") else {
return Ok(0);
};
-
let Some((timestamps_raw, _)) = handle.read().first_key_value()? else {
+
let Some((timestamps_raw, _)) = handle.first_key_value()? else {
return Ok(0);
};
let mut timestamp_reader = Cursor::new(timestamps_raw);
+501
server/src/db_old/block.rs
···
+
use ordered_varint::Variable;
+
use rkyv::{
+
Archive, Deserialize, Serialize,
+
api::high::{HighSerializer, HighValidator},
+
bytecheck::CheckBytes,
+
de::Pool,
+
rancor::{self, Strategy},
+
ser::allocator::ArenaHandle,
+
util::AlignedVec,
+
};
+
use std::{
+
io::{self, Read, Write},
+
marker::PhantomData,
+
};
+
+
use crate::error::{AppError, AppResult};
+
+
pub struct Item<T> {
+
pub timestamp: u64,
+
data: AlignedVec,
+
phantom: PhantomData<T>,
+
}
+
+
impl<T: Archive> Item<T> {
+
pub fn access(&self) -> &T::Archived {
+
unsafe { rkyv::access_unchecked::<T::Archived>(&self.data) }
+
}
+
}
+
+
impl<T> Item<T>
+
where
+
T: Archive,
+
T::Archived: for<'a> CheckBytes<HighValidator<'a, rancor::Error>>
+
+ Deserialize<T, Strategy<Pool, rancor::Error>>,
+
{
+
pub fn deser(&self) -> AppResult<T> {
+
rkyv::from_bytes(&self.data).map_err(AppError::from)
+
}
+
}
+
+
impl<T: for<'a> Serialize<HighSerializer<AlignedVec, ArenaHandle<'a>, rancor::Error>>> Item<T> {
+
pub fn new(timestamp: u64, data: &T) -> Self {
+
Item {
+
timestamp,
+
data: unsafe { rkyv::to_bytes(data).unwrap_unchecked() },
+
phantom: PhantomData,
+
}
+
}
+
}
+
+
pub struct ItemEncoder<W: Write, T> {
+
writer: W,
+
prev_timestamp: u64,
+
prev_delta: i64,
+
_item: PhantomData<T>,
+
}
+
+
impl<W: Write, T> ItemEncoder<W, T> {
+
pub fn new(writer: W) -> Self {
+
ItemEncoder {
+
writer,
+
prev_timestamp: 0,
+
prev_delta: 0,
+
_item: PhantomData,
+
}
+
}
+
+
pub fn encode(&mut self, item: &Item<T>) -> AppResult<()> {
+
if self.prev_timestamp == 0 {
+
// self.writer.write_varint(item.timestamp)?;
+
self.prev_timestamp = item.timestamp;
+
self.write_data(&item.data)?;
+
return Ok(());
+
}
+
+
let delta = (item.timestamp as i128 - self.prev_timestamp as i128) as i64;
+
+
self.writer.write_varint(delta - self.prev_delta)?;
+
self.prev_timestamp = item.timestamp;
+
self.prev_delta = delta;
+
+
self.write_data(&item.data)?;
+
+
Ok(())
+
}
+
+
fn write_data(&mut self, data: &[u8]) -> AppResult<()> {
+
self.writer.write_varint(data.len())?;
+
self.writer.write_all(data)?;
+
Ok(())
+
}
+
+
pub fn finish(mut self) -> AppResult<W> {
+
self.writer.flush()?;
+
Ok(self.writer)
+
}
+
}
+
+
pub struct ItemDecoder<R, T> {
+
reader: R,
+
current_timestamp: u64,
+
current_delta: i64,
+
first_item: bool,
+
_item: PhantomData<T>,
+
}
+
+
impl<R: Read, T: Archive> ItemDecoder<R, T> {
+
pub fn new(reader: R, start_timestamp: u64) -> AppResult<Self> {
+
Ok(ItemDecoder {
+
reader,
+
current_timestamp: start_timestamp,
+
current_delta: 0,
+
first_item: true,
+
_item: PhantomData,
+
})
+
}
+
+
pub fn decode(&mut self) -> AppResult<Option<Item<T>>> {
+
if self.first_item {
+
// read the first timestamp
+
// let timestamp = match self.reader.read_varint::<u64>() {
+
// Ok(timestamp) => timestamp,
+
// Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+
// Err(e) => return Err(e.into()),
+
// };
+
// self.current_timestamp = timestamp;
+
+
let Some(data_raw) = self.read_item()? else {
+
return Ok(None);
+
};
+
self.first_item = false;
+
return Ok(Some(Item {
+
timestamp: self.current_timestamp,
+
data: data_raw,
+
phantom: PhantomData,
+
}));
+
}
+
+
let Some(_delta) = self.read_timestamp()? else {
+
return Ok(None);
+
};
+
+
// read data
+
let data_raw = match self.read_item()? {
+
Some(data_raw) => data_raw,
+
None => {
+
return Err(io::Error::new(
+
io::ErrorKind::UnexpectedEof,
+
"expected data after delta",
+
)
+
.into());
+
}
+
};
+
+
Ok(Some(Item {
+
timestamp: self.current_timestamp,
+
data: data_raw,
+
phantom: PhantomData,
+
}))
+
}
+
+
// [10, 11, 12, 14] -> [1, 1, 2] -> [0, 1]
+
fn read_timestamp(&mut self) -> AppResult<Option<u64>> {
+
let delta = match self.reader.read_varint::<i64>() {
+
Ok(delta) => delta,
+
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+
Err(e) => return Err(e.into()),
+
};
+
self.current_delta += delta;
+
self.current_timestamp =
+
(self.current_timestamp as i128 + self.current_delta as i128) as u64;
+
Ok(Some(self.current_timestamp))
+
}
+
+
fn read_item(&mut self) -> AppResult<Option<AlignedVec>> {
+
let data_len = match self.reader.read_varint::<usize>() {
+
Ok(data_len) => data_len,
+
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+
Err(e) => return Err(e.into()),
+
};
+
let mut data_raw = AlignedVec::with_capacity(data_len);
+
for _ in 0..data_len {
+
data_raw.push(0);
+
}
+
self.reader.read_exact(data_raw.as_mut_slice())?;
+
Ok(Some(data_raw))
+
}
+
}
+
+
impl<R: Read, T: Archive> Iterator for ItemDecoder<R, T> {
+
type Item = AppResult<Item<T>>;
+
+
fn next(&mut self) -> Option<Self::Item> {
+
self.decode().transpose()
+
}
+
}
+
+
pub trait WriteVariableExt: Write {
+
fn write_varint(&mut self, value: impl Variable) -> io::Result<usize> {
+
value.encode_variable(self)
+
}
+
}
+
impl<W: Write> WriteVariableExt for W {}
+
+
pub trait ReadVariableExt: Read {
+
fn read_varint<T: Variable>(&mut self) -> io::Result<T> {
+
T::decode_variable(self)
+
}
+
}
+
impl<R: Read> ReadVariableExt for R {}
+
+
#[cfg(test)]
+
mod test {
+
use super::*;
+
use rkyv::{Archive, Deserialize, Serialize};
+
use std::io::Cursor;
+
+
#[derive(Archive, Deserialize, Serialize, Debug, PartialEq)]
+
#[rkyv(compare(PartialEq))]
+
struct TestData {
+
id: u32,
+
value: String,
+
}
+
+
#[test]
+
fn test_encoder_decoder_single_item() {
+
let data = TestData {
+
id: 123,
+
value: "test".to_string(),
+
};
+
+
let item = Item::new(1000, &data);
+
+
// encode
+
let mut buffer = Vec::new();
+
let mut encoder = ItemEncoder::new(&mut buffer);
+
encoder.encode(&item).unwrap();
+
encoder.finish().unwrap();
+
+
// decode
+
let cursor = Cursor::new(buffer);
+
let mut decoder = ItemDecoder::<_, TestData>::new(cursor, 1000).unwrap();
+
+
let decoded_item = decoder.decode().unwrap().unwrap();
+
assert_eq!(decoded_item.timestamp, 1000);
+
+
let decoded_data = decoded_item.access();
+
assert_eq!(decoded_data.id, 123);
+
assert_eq!(decoded_data.value.as_str(), "test");
+
}
+
+
#[test]
+
fn test_encoder_decoder_multiple_items() {
+
let items = vec![
+
Item::new(
+
1000,
+
&TestData {
+
id: 1,
+
value: "first".to_string(),
+
},
+
),
+
Item::new(
+
1010,
+
&TestData {
+
id: 2,
+
value: "second".to_string(),
+
},
+
),
+
Item::new(
+
1015,
+
&TestData {
+
id: 3,
+
value: "third".to_string(),
+
},
+
),
+
Item::new(
+
1025,
+
&TestData {
+
id: 4,
+
value: "fourth".to_string(),
+
},
+
),
+
];
+
+
// encode
+
let mut buffer = Vec::new();
+
let mut encoder = ItemEncoder::new(&mut buffer);
+
+
for item in &items {
+
encoder.encode(item).unwrap();
+
}
+
encoder.finish().unwrap();
+
+
// decode
+
let cursor = Cursor::new(buffer);
+
let mut decoder = ItemDecoder::<_, TestData>::new(cursor, 1000).unwrap();
+
+
let mut decoded_items = Vec::new();
+
while let Some(item) = decoder.decode().unwrap() {
+
decoded_items.push(item);
+
}
+
+
assert_eq!(decoded_items.len(), 4);
+
+
for (original, decoded) in items.iter().zip(decoded_items.iter()) {
+
assert_eq!(original.timestamp, decoded.timestamp);
+
assert_eq!(original.access().id, decoded.access().id);
+
assert_eq!(
+
original.access().value.as_str(),
+
decoded.access().value.as_str()
+
);
+
}
+
}
+
+
#[test]
+
fn test_encoder_decoder_with_iterator() {
+
let items = vec![
+
Item::new(
+
2000,
+
&TestData {
+
id: 10,
+
value: "a".to_string(),
+
},
+
),
+
Item::new(
+
2005,
+
&TestData {
+
id: 20,
+
value: "b".to_string(),
+
},
+
),
+
Item::new(
+
2012,
+
&TestData {
+
id: 30,
+
value: "c".to_string(),
+
},
+
),
+
];
+
+
// encode
+
let mut buffer = Vec::new();
+
let mut encoder = ItemEncoder::new(&mut buffer);
+
+
for item in &items {
+
encoder.encode(item).unwrap();
+
}
+
encoder.finish().unwrap();
+
+
// decode
+
let cursor = Cursor::new(buffer);
+
let decoder = ItemDecoder::<_, TestData>::new(cursor, 2000).unwrap();
+
+
let decoded_items: Result<Vec<_>, _> = decoder.collect();
+
let decoded_items = decoded_items.unwrap();
+
+
assert_eq!(decoded_items.len(), 3);
+
assert_eq!(decoded_items[0].timestamp, 2000);
+
assert_eq!(decoded_items[1].timestamp, 2005);
+
assert_eq!(decoded_items[2].timestamp, 2012);
+
+
assert_eq!(decoded_items[0].access().id, 10);
+
assert_eq!(decoded_items[1].access().id, 20);
+
assert_eq!(decoded_items[2].access().id, 30);
+
}
+
+
#[test]
+
fn test_delta_compression() {
+
let items = vec![
+
Item::new(
+
1000,
+
&TestData {
+
id: 1,
+
value: "a".to_string(),
+
},
+
),
+
Item::new(
+
1010,
+
&TestData {
+
id: 2,
+
value: "b".to_string(),
+
},
+
), // delta = 10
+
Item::new(
+
1020,
+
&TestData {
+
id: 3,
+
value: "c".to_string(),
+
},
+
), // delta = 10, delta-of-delta = 0
+
Item::new(
+
1025,
+
&TestData {
+
id: 4,
+
value: "d".to_string(),
+
},
+
), // delta = 5, delta-of-delta = -5
+
];
+
+
let mut buffer = Vec::new();
+
let mut encoder = ItemEncoder::new(&mut buffer);
+
+
for item in &items {
+
encoder.encode(item).unwrap();
+
}
+
encoder.finish().unwrap();
+
+
// decode and verify
+
let cursor = Cursor::new(buffer);
+
let decoder = ItemDecoder::<_, TestData>::new(cursor, 1000).unwrap();
+
+
let decoded_items: Result<Vec<_>, _> = decoder.collect();
+
let decoded_items = decoded_items.unwrap();
+
+
for (original, decoded) in items.iter().zip(decoded_items.iter()) {
+
assert_eq!(original.timestamp, decoded.timestamp);
+
assert_eq!(original.access().id, decoded.access().id);
+
}
+
}
+
+
#[test]
+
fn test_empty_decode() {
+
let buffer = Vec::new();
+
let cursor = Cursor::new(buffer);
+
let mut decoder = ItemDecoder::<_, TestData>::new(cursor, 1000).unwrap();
+
+
let result = decoder.decode().unwrap();
+
assert!(result.is_none());
+
}
+
+
#[test]
+
fn test_backwards_timestamp() {
+
let items = vec![
+
Item::new(
+
1000,
+
&TestData {
+
id: 1,
+
value: "first".to_string(),
+
},
+
),
+
Item::new(
+
900,
+
&TestData {
+
id: 2,
+
value: "second".to_string(),
+
},
+
),
+
];
+
+
let mut buffer = Vec::new();
+
let mut encoder = ItemEncoder::new(&mut buffer);
+
+
for item in &items {
+
encoder.encode(item).unwrap();
+
}
+
encoder.finish().unwrap();
+
+
let cursor = Cursor::new(buffer);
+
let decoder = ItemDecoder::<_, TestData>::new(cursor, 1000).unwrap();
+
+
let decoded_items: Result<Vec<_>, _> = decoder.collect();
+
let decoded_items = decoded_items.unwrap();
+
+
assert_eq!(decoded_items.len(), 2);
+
assert_eq!(decoded_items[0].timestamp, 1000);
+
assert_eq!(decoded_items[1].timestamp, 900);
+
}
+
+
#[test]
+
fn test_different_data_sizes() {
+
let small_data = TestData {
+
id: 1,
+
value: "x".to_string(),
+
};
+
let large_data = TestData {
+
id: 2,
+
value: "a".repeat(1000),
+
};
+
+
let items = vec![Item::new(1000, &small_data), Item::new(1001, &large_data)];
+
+
let mut buffer = Vec::new();
+
let mut encoder = ItemEncoder::new(&mut buffer);
+
+
for item in &items {
+
encoder.encode(item).unwrap();
+
}
+
encoder.finish().unwrap();
+
+
let cursor = Cursor::new(buffer);
+
let decoder = ItemDecoder::<_, TestData>::new(cursor, 1000).unwrap();
+
+
let decoded_items: Result<Vec<_>, _> = decoder.collect();
+
let decoded_items = decoded_items.unwrap();
+
+
assert_eq!(decoded_items.len(), 2);
+
assert_eq!(decoded_items[0].access().value.as_str(), "x");
+
assert_eq!(decoded_items[1].access().value.len(), 1000);
+
assert_eq!(decoded_items[1].access().value.as_str(), "a".repeat(1000));
+
}
+
}
+424
server/src/db_old/mod.rs
···
+
use std::{
+
io::Cursor,
+
ops::{Bound, Deref, RangeBounds},
+
path::Path,
+
sync::{
+
Arc,
+
atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering},
+
},
+
time::{Duration, Instant},
+
};
+
+
use fjall::{Config, Keyspace, Partition, PartitionCreateOptions, Slice};
+
use ordered_varint::Variable;
+
use rkyv::{Archive, Deserialize, Serialize, rancor::Error};
+
use smol_str::SmolStr;
+
use tokio::sync::broadcast;
+
+
use crate::{
+
db_old::block::{ReadVariableExt, WriteVariableExt},
+
error::{AppError, AppResult},
+
jetstream::JetstreamEvent,
+
utils::{DefaultRateTracker, get_time},
+
};
+
+
mod block;
+
+
#[derive(Clone, Debug, Default, Archive, Deserialize, Serialize, PartialEq)]
+
#[rkyv(compare(PartialEq), derive(Debug))]
+
pub struct NsidCounts {
+
pub count: u128,
+
pub deleted_count: u128,
+
pub last_seen: u64,
+
}
+
+
#[derive(Debug, Default, Archive, Deserialize, Serialize, PartialEq)]
+
#[rkyv(compare(PartialEq), derive(Debug))]
+
pub struct NsidHit {
+
pub deleted: bool,
+
}
+
+
#[derive(Clone)]
+
pub struct EventRecord {
+
pub nsid: SmolStr,
+
pub timestamp: u64, // seconds
+
pub deleted: bool,
+
}
+
+
impl EventRecord {
+
pub fn from_jetstream(event: JetstreamEvent) -> Option<Self> {
+
match event {
+
JetstreamEvent::Commit {
+
time_us, commit, ..
+
} => Some(Self {
+
nsid: commit.collection.into(),
+
timestamp: time_us / 1_000_000,
+
deleted: false,
+
}),
+
JetstreamEvent::Delete {
+
time_us, commit, ..
+
} => Some(Self {
+
nsid: commit.collection.into(),
+
timestamp: time_us / 1_000_000,
+
deleted: true,
+
}),
+
_ => None,
+
}
+
}
+
}
+
+
type ItemDecoder = block::ItemDecoder<Cursor<Slice>, NsidHit>;
+
type ItemEncoder = block::ItemEncoder<Vec<u8>, NsidHit>;
+
type Item = block::Item<NsidHit>;
+
+
pub struct LexiconHandle {
+
tree: Partition,
+
buf: Arc<scc::Queue<EventRecord>>,
+
buf_len: AtomicUsize,
+
last_insert: AtomicU64,
+
eps: DefaultRateTracker,
+
block_size: AtomicUsize,
+
}
+
+
impl LexiconHandle {
+
fn new(keyspace: &Keyspace, nsid: &str) -> Self {
+
let opts = PartitionCreateOptions::default().compression(fjall::CompressionType::Miniz(9));
+
Self {
+
tree: keyspace.open_partition(nsid, opts).unwrap(),
+
buf: Default::default(),
+
buf_len: AtomicUsize::new(0),
+
last_insert: AtomicU64::new(0),
+
eps: DefaultRateTracker::new(Duration::from_secs(5)),
+
block_size: AtomicUsize::new(1000),
+
}
+
}
+
+
fn item_count(&self) -> usize {
+
self.buf_len.load(AtomicOrdering::Acquire)
+
}
+
+
fn last_insert(&self) -> u64 {
+
self.last_insert.load(AtomicOrdering::Acquire)
+
}
+
+
fn suggested_block_size(&self) -> usize {
+
self.block_size.load(AtomicOrdering::Relaxed)
+
}
+
+
fn insert(&self, event: EventRecord) {
+
self.buf.push(event);
+
self.buf_len.fetch_add(1, AtomicOrdering::Release);
+
self.last_insert
+
.store(get_time().as_millis() as u64, AtomicOrdering::Release);
+
self.eps.observe(1);
+
let rate = self.eps.rate() as usize;
+
if rate != 0 {
+
self.block_size.store(rate * 60, AtomicOrdering::Relaxed);
+
}
+
}
+
+
fn sync(&self, max_block_size: usize) -> AppResult<usize> {
+
let mut writer = ItemEncoder::new(Vec::with_capacity(
+
size_of::<u64>() + self.item_count().min(max_block_size) * size_of::<(u64, NsidHit)>(),
+
));
+
let mut start_timestamp = None;
+
let mut end_timestamp = None;
+
let mut written = 0_usize;
+
while let Some(event) = self.buf.pop() {
+
let item = Item::new(
+
event.timestamp,
+
&NsidHit {
+
deleted: event.deleted,
+
},
+
);
+
writer.encode(&item)?;
+
if start_timestamp.is_none() {
+
start_timestamp = Some(event.timestamp);
+
}
+
end_timestamp = Some(event.timestamp);
+
if written >= max_block_size {
+
break;
+
}
+
written += 1;
+
}
+
if let (Some(start_timestamp), Some(end_timestamp)) = (start_timestamp, end_timestamp) {
+
self.buf_len.store(0, AtomicOrdering::Release);
+
let value = writer.finish()?;
+
let mut key = Vec::with_capacity(size_of::<u64>() * 2);
+
key.write_varint(start_timestamp)?;
+
key.write_varint(end_timestamp)?;
+
self.tree.insert(key, value)?;
+
}
+
Ok(written)
+
}
+
}
+
+
type BoxedIter<T> = Box<dyn Iterator<Item = T>>;
+
+
// counts is nsid -> NsidCounts
+
// hits is tree per nsid: varint start time + varint end time -> block of hits
+
pub struct Db {
+
inner: Keyspace,
+
hits: scc::HashIndex<SmolStr, Arc<LexiconHandle>>,
+
counts: Partition,
+
event_broadcaster: broadcast::Sender<(SmolStr, NsidCounts)>,
+
eps: DefaultRateTracker,
+
min_block_size: usize,
+
max_block_size: usize,
+
max_last_activity: Duration,
+
}
+
+
impl Db {
+
pub fn new(path: impl AsRef<Path>) -> AppResult<Self> {
+
tracing::info!("opening db...");
+
let ks = Config::new(path)
+
.cache_size(8 * 1024 * 1024) // from talna
+
.open()?;
+
Ok(Self {
+
hits: Default::default(),
+
counts: ks.open_partition(
+
"_counts",
+
PartitionCreateOptions::default().compression(fjall::CompressionType::None),
+
)?,
+
inner: ks,
+
event_broadcaster: broadcast::channel(1000).0,
+
eps: DefaultRateTracker::new(Duration::from_secs(1)),
+
min_block_size: 512,
+
max_block_size: 100_000,
+
max_last_activity: Duration::from_secs(10),
+
})
+
}
+
+
pub fn sync(&self, all: bool) -> AppResult<()> {
+
let _guard = scc::ebr::Guard::new();
+
for (nsid, tree) in self.hits.iter(&_guard) {
+
let count = tree.item_count();
+
let is_max_block_size = count > self.min_block_size.max(tree.suggested_block_size());
+
let is_too_old = (get_time().as_millis() as u64 - tree.last_insert())
+
> self.max_last_activity.as_millis() as u64;
+
if count > 0 && (all || is_max_block_size || is_too_old) {
+
loop {
+
let synced = tree.sync(self.max_block_size)?;
+
if synced == 0 {
+
break;
+
}
+
tracing::info!("synced {synced} of {nsid} to db");
+
}
+
}
+
}
+
Ok(())
+
}
+
+
#[inline(always)]
+
pub fn eps(&self) -> usize {
+
self.eps.rate() as usize
+
}
+
+
#[inline(always)]
+
pub fn new_listener(&self) -> broadcast::Receiver<(SmolStr, NsidCounts)> {
+
self.event_broadcaster.subscribe()
+
}
+
+
#[inline(always)]
+
fn maybe_run_in_nsid_tree<T>(
+
&self,
+
nsid: &str,
+
f: impl FnOnce(&LexiconHandle) -> T,
+
) -> Option<T> {
+
let _guard = scc::ebr::Guard::new();
+
let handle = match self.hits.peek(nsid, &_guard) {
+
Some(handle) => handle.clone(),
+
None => {
+
if self.inner.partition_exists(nsid) {
+
let handle = Arc::new(LexiconHandle::new(&self.inner, nsid));
+
let _ = self.hits.insert(SmolStr::new(nsid), handle.clone());
+
handle
+
} else {
+
return None;
+
}
+
}
+
};
+
Some(f(&handle))
+
}
+
+
#[inline(always)]
+
fn run_in_nsid_tree<T>(
+
&self,
+
nsid: SmolStr,
+
f: impl FnOnce(&LexiconHandle) -> AppResult<T>,
+
) -> AppResult<T> {
+
f(self
+
.hits
+
.entry(nsid.clone())
+
.or_insert_with(move || Arc::new(LexiconHandle::new(&self.inner, &nsid)))
+
.get())
+
}
+
+
pub fn record_event(&self, e: EventRecord) -> AppResult<()> {
+
let EventRecord {
+
nsid,
+
timestamp,
+
deleted,
+
} = e.clone();
+
+
// insert event
+
self.run_in_nsid_tree(nsid.clone(), move |tree| Ok(tree.insert(e)))?;
+
// increment count
+
let mut counts = self.get_count(&nsid)?;
+
counts.last_seen = timestamp;
+
if deleted {
+
counts.deleted_count += 1;
+
} else {
+
counts.count += 1;
+
}
+
self.insert_count(&nsid, counts.clone())?;
+
if self.event_broadcaster.receiver_count() > 0 {
+
let _ = self.event_broadcaster.send((SmolStr::new(&nsid), counts));
+
}
+
self.eps.observe(1);
+
Ok(())
+
}
+
+
#[inline(always)]
+
fn insert_count(&self, nsid: &str, counts: NsidCounts) -> AppResult<()> {
+
self.counts
+
.insert(
+
nsid,
+
unsafe { rkyv::to_bytes::<Error>(&counts).unwrap_unchecked() }.as_slice(),
+
)
+
.map_err(AppError::from)
+
}
+
+
pub fn get_count(&self, nsid: &str) -> AppResult<NsidCounts> {
+
let Some(raw) = self.counts.get(nsid)? else {
+
return Ok(NsidCounts::default());
+
};
+
Ok(unsafe { rkyv::from_bytes_unchecked::<_, Error>(&raw).unwrap_unchecked() })
+
}
+
+
pub fn get_counts(&self) -> impl Iterator<Item = AppResult<(SmolStr, NsidCounts)>> {
+
self.counts.iter().map(|res| {
+
res.map_err(AppError::from).map(|(key, val)| {
+
(
+
SmolStr::new(unsafe { str::from_utf8_unchecked(&key) }),
+
unsafe { rkyv::from_bytes_unchecked::<_, Error>(&val).unwrap_unchecked() },
+
)
+
})
+
})
+
}
+
+
pub fn get_nsids(&self) -> impl Iterator<Item = impl Deref<Target = str> + 'static> {
+
self.inner
+
.list_partitions()
+
.into_iter()
+
.filter(|k| k.deref() != "_counts")
+
}
+
+
pub fn get_hits_debug(&self, nsid: &str) -> BoxedIter<AppResult<(Slice, Slice)>> {
+
self.maybe_run_in_nsid_tree(nsid, |handle| -> BoxedIter<AppResult<(Slice, Slice)>> {
+
Box::new(
+
handle
+
.tree
+
.iter()
+
.rev()
+
.map(|res| res.map_err(AppError::from)),
+
)
+
})
+
.unwrap_or_else(|| Box::new(std::iter::empty()))
+
}
+
+
pub fn get_hits(
+
&self,
+
nsid: &str,
+
range: impl RangeBounds<u64> + std::fmt::Debug,
+
) -> BoxedIter<AppResult<Item>> {
+
let start = range
+
.start_bound()
+
.cloned()
+
.map(|t| unsafe { t.to_variable_vec().unwrap_unchecked() });
+
let end = range
+
.end_bound()
+
.cloned()
+
.map(|t| unsafe { t.to_variable_vec().unwrap_unchecked() });
+
let limit = match range.end_bound().cloned() {
+
Bound::Included(end) => end,
+
Bound::Excluded(end) => end.saturating_sub(1),
+
Bound::Unbounded => u64::MAX,
+
};
+
+
self.maybe_run_in_nsid_tree(nsid, move |handle| -> BoxedIter<AppResult<Item>> {
+
let map_block = move |(key, val)| {
+
let mut key_reader = Cursor::new(key);
+
let start_timestamp = key_reader.read_varint::<u64>()?;
+
let items =
+
ItemDecoder::new(Cursor::new(val), start_timestamp)?.take_while(move |item| {
+
item.as_ref().map_or(true, |item| item.timestamp <= limit)
+
});
+
Ok(items)
+
};
+
+
Box::new(
+
handle
+
.tree
+
.range(TimestampRange { start, end })
+
.map(move |res| res.map_err(AppError::from).and_then(map_block))
+
.flatten()
+
.flatten(),
+
)
+
})
+
.unwrap_or_else(|| Box::new(std::iter::empty()))
+
}
+
+
pub fn tracking_since(&self) -> AppResult<u64> {
+
// HACK: we should actually store when we started tracking but im lazy
+
// should be accurate enough
+
self.maybe_run_in_nsid_tree("app.bsky.feed.like", |handle| {
+
let Some((timestamps_raw, _)) = handle.tree.first_key_value()? else {
+
return Ok(0);
+
};
+
let mut timestamp_reader = Cursor::new(timestamps_raw);
+
timestamp_reader
+
.read_varint::<u64>()
+
.map_err(AppError::from)
+
})
+
.unwrap_or(Ok(0))
+
}
+
}
+
+
type TimestampRepr = Vec<u8>;
+
+
struct TimestampRange {
+
start: Bound<TimestampRepr>,
+
end: Bound<TimestampRepr>,
+
}
+
+
impl RangeBounds<TimestampRepr> for TimestampRange {
+
#[inline(always)]
+
fn start_bound(&self) -> Bound<&TimestampRepr> {
+
self.start.as_ref()
+
}
+
+
#[inline(always)]
+
fn end_bound(&self) -> Bound<&TimestampRepr> {
+
self.end.as_ref()
+
}
+
}
+
+
type TimestampReprOld = [u8; 8];
+
+
struct TimestampRangeOld {
+
start: Bound<TimestampReprOld>,
+
end: Bound<TimestampReprOld>,
+
}
+
+
impl RangeBounds<TimestampReprOld> for TimestampRangeOld {
+
#[inline(always)]
+
fn start_bound(&self) -> Bound<&TimestampReprOld> {
+
self.start.as_ref()
+
}
+
+
#[inline(always)]
+
fn end_bound(&self) -> Bound<&TimestampReprOld> {
+
self.end.as_ref()
+
}
+
}
+11 -21
server/src/jetstream.rs
···
pub struct JetstreamClient {
stream: Option<WebSocketStream<MaybeTlsStream<TcpStream>>>,
tls_connector: tokio_websockets::Connector,
-
urls: Vec<SmolStr>,
+
url: SmolStr,
}
impl JetstreamClient {
-
pub fn new(urls: impl IntoIterator<Item = impl Into<SmolStr>>) -> AppResult<Self> {
+
pub fn new(url: &str) -> AppResult<Self> {
Ok(Self {
stream: None,
tls_connector: tokio_websockets::Connector::new()?,
-
urls: urls.into_iter().map(Into::into).collect(),
+
url: SmolStr::new(url),
})
}
pub async fn connect(&mut self) -> AppResult<()> {
-
for uri in &self.urls {
-
let conn_result = ClientBuilder::new()
-
.connector(&self.tls_connector)
-
.uri(uri)?
-
.connect()
-
.await;
-
match conn_result {
-
Ok((stream, _)) => {
-
self.stream = Some(stream);
-
tracing::info!("connected to jetstream {}", uri);
-
return Ok(());
-
}
-
Err(err) => {
-
tracing::error!("failed to connect to jetstream {uri}: {err}");
-
}
-
};
-
}
-
Err(anyhow!("failed to connect to any jetstream server").into())
+
let (stream, _) = ClientBuilder::new()
+
.connector(&self.tls_connector)
+
.uri(&self.url)?
+
.connect()
+
.await?;
+
self.stream = Some(stream);
+
tracing::info!("connected to jetstream ({})", self.url);
+
Ok(())
}
// automatically retries connection, only returning error if it fails many times
+21 -48
server/src/main.rs
···
-
use std::{ops::Deref, time::Duration, u64, usize};
+
use std::{ops::Deref, time::Duration, u64};
use itertools::Itertools;
use rclite::Arc;
···
mod api;
mod db;
+
mod db_old;
mod error;
mod jetstream;
mod utils;
···
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
#[cfg(target_env = "msvc")]
+
#[global_allocator]
+
static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
+
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::fmt()
···
debug();
return;
}
-
Some("print") => {
-
print_all();
-
return;
-
}
Some(x) => {
tracing::error!("unknown command: {}", x);
return;
···
.install_default()
.expect("cant install rustls crypto provider");
-
let urls = [
-
"wss://jetstream1.us-west.bsky.network/subscribe",
-
"wss://jetstream2.us-west.bsky.network/subscribe",
-
"wss://jetstream2.fr.hose.cam/subscribe",
-
"wss://jetstream.fire.hose.cam/subscribe",
-
];
-
let mut jetstream = match JetstreamClient::new(urls) {
-
Ok(client) => client,
-
Err(err) => {
-
tracing::error!("can't create jetstream client: {err}");
-
return;
-
}
-
};
+
let mut jetstream =
+
match JetstreamClient::new("wss://jetstream2.us-west.bsky.network/subscribe") {
+
Ok(client) => client,
+
Err(err) => {
+
tracing::error!("can't create jetstream client: {err}");
+
return;
+
}
+
};
let (event_tx, mut event_rx) = tokio::sync::mpsc::channel(1000);
let consume_events = tokio::spawn({
···
move || {
let mut buffer = Vec::new();
loop {
-
let read = event_rx.blocking_recv_many(&mut buffer, 500);
+
let read = event_rx.blocking_recv_many(&mut buffer, 100);
if let Err(err) = db.ingest_events(buffer.drain(..)) {
tracing::error!("failed to ingest events: {}", err);
}
···
if db.is_shutting_down() {
return;
}
-
let end = get_time();
+
let end = get_time() - compact_period / 2;
let start = end - compact_period;
let range = start.as_secs()..end.as_secs();
tracing::info!(
···
db.sync(true).expect("cant sync db");
}
-
fn print_all() {
-
let db = Db::new(DbConfig::default(), CancellationToken::new()).expect("couldnt create db");
-
let nsids = db.get_nsids().collect::<Vec<_>>();
-
let mut count = 0_usize;
-
for nsid in nsids {
-
println!("{}:", nsid.deref());
-
for hit in db.get_hits(&nsid, .., usize::MAX) {
-
let hit = hit.expect("aaa");
-
println!("{} {}", hit.timestamp, hit.deser().unwrap().deleted);
-
count += 1;
-
}
-
}
-
println!("total hits: {}", count);
-
}
-
fn debug() {
let db = Db::new(DbConfig::default(), CancellationToken::new()).expect("couldnt create db");
let info = db.info().expect("cant get db info");
···
fn migrate() {
let cancel_token = CancellationToken::new();
-
let from = Arc::new(
-
Db::new(
-
DbConfig::default().path(".fjall_data_from"),
-
cancel_token.child_token(),
-
)
-
.expect("couldnt create db"),
-
);
+
+
let from = Arc::new(db_old::Db::new(".fjall_data_from").expect("couldnt create db"));
+
let to = Arc::new(
Db::new(
DbConfig::default().path(".fjall_data_to").ks(|c| {
···
);
let nsids = from.get_nsids().collect::<Vec<_>>();
-
let _eps_thread = std::thread::spawn({
+
let eps_thread = std::thread::spawn({
let to = to.clone();
move || {
loop {
···
threads.push(std::thread::spawn(move || {
tracing::info!("{}: migrating...", nsid.deref());
let mut count = 0_u64;
-
for hits in from
-
.get_hits(&nsid, .., usize::MAX)
-
.chunks(100000)
-
.into_iter()
-
{
+
for hits in from.get_hits(&nsid, ..).chunks(100000).into_iter() {
to.ingest_events(hits.map(|hit| {
count += 1;
let hit = hit.expect("cant decode hit");
-66
server/src/utils.rs
···
use std::io::{self, Read, Write};
-
use std::ops::Deref;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
-
use arc_swap::RefCnt;
use byteview::ByteView;
use ordered_varint::Variable;
-
use rclite::Arc;
pub fn get_time() -> Duration {
std::time::SystemTime::now()
···
}
}
}
-
-
pub type ArcliteSwap<T> = arc_swap::ArcSwapAny<ArcRefCnt<T>>;
-
-
pub struct ArcRefCnt<T>(Arc<T>);
-
-
impl<T> ArcRefCnt<T> {
-
pub fn new(value: T) -> Self {
-
Self(Arc::new(value))
-
}
-
}
-
-
impl<T> Deref for ArcRefCnt<T> {
-
type Target = T;
-
-
fn deref(&self) -> &Self::Target {
-
&self.0
-
}
-
}
-
-
impl<T> Clone for ArcRefCnt<T> {
-
fn clone(&self) -> Self {
-
Self(self.0.clone())
-
}
-
}
-
-
// SAFETY: uhhhhhhhh copied the Arc impl from arc_swap xd
-
unsafe impl<T> RefCnt for ArcRefCnt<T> {
-
type Base = T;
-
-
fn into_ptr(me: Self) -> *mut Self::Base {
-
Arc::into_raw(me.0) as *mut T
-
}
-
-
fn as_ptr(me: &Self) -> *mut Self::Base {
-
// Slightly convoluted way to do this, but this avoids stacked borrows violations. The same
-
// intention as
-
//
-
// me as &T as *const T as *mut T
-
//
-
// We first create a "shallow copy" of me - one that doesn't really own its ref count
-
// (that's OK, me _does_ own it, so it can't be destroyed in the meantime).
-
// Then we can use into_raw (which preserves not having the ref count).
-
//
-
// We need to "revert" the changes we did. In current std implementation, the combination
-
// of from_raw and forget is no-op. But formally, into_raw shall be paired with from_raw
-
// and that read shall be paired with forget to properly "close the brackets". In future
-
// versions of STD, these may become something else that's not really no-op (unlikely, but
-
// possible), so we future-proof it a bit.
-
-
// SAFETY: &T cast to *const T will always be aligned, initialised and valid for reads
-
let ptr = Arc::into_raw(unsafe { std::ptr::read(&me.0) });
-
let ptr = ptr as *mut T;
-
-
// SAFETY: We got the pointer from into_raw just above
-
std::mem::forget(unsafe { Arc::from_raw(ptr) });
-
-
ptr
-
}
-
-
unsafe fn from_ptr(ptr: *const Self::Base) -> Self {
-
Self(unsafe { Arc::from_raw(ptr) })
-
}
-
}