atproto-car-generate-tree-cid.py
1#!/usr/bin/env python3
2
3# no idea if this still works (though I guess it should?)
4# maybe something here somebody can learn from
5# the 2023 goal was to learn how CIDs are generated, the 2025 goal is add my first tangled string
6
7import hashlib, base64, cbor2
8from base64 import b32encode
9from dataclasses import dataclass
10from typing import Any
11
12DAG_CBOR_CID_TAG = 42
13
14@dataclass
15class Link:
16 cid: bytes
17
18def encode_cbor(val: Any) -> bytes:
19 def dag_cbor_encoder(encoder, value):
20 if isinstance(value, Link):
21 return encoder.encode(cbor2.CBORTag(DAG_CBOR_CID_TAG, value.cid))
22
23 return cbor2.dumps(val, default=dag_cbor_encoder, canonical=True)
24
25def hash_obj(val: bytes) -> bytes:
26 return hashlib.sha256(val).digest()
27
28def encode_b32(digest: bytes) -> str:
29 return 'b' + b32encode(b'\x01\x71\x12\x20' + digest).decode().lower().rstrip('=')
30
31def encode_cid(val: dict) -> Link:
32 digest = hash_obj(encode_cbor(val))
33 return Link(cid=b'\x00\x01\x71\x12\x20' + digest)
34
35record1 = {
36 'k': b'app.bsky.feed.post/3keyfhciqrr2j',
37 'p': 0,
38 'v': encode_cid({'text': 'hello world', '$type': 'app.bsky.feed.post', 'langs': ['en'], 'createdAt': '2023-11-25T04:53:44.772Z'}),
39 't': None,
40}
41
42record2 = {
43 'k': b'graph.follow/3keyfhsnwm72p',
44 'p': 9,
45 'v': encode_cid({'$type': 'app.bsky.graph.follow', 'subject': 'did:plc:4nsduwlpivpuur4mqkbfvm6a', 'createdAt': '2023-11-25T04:54:01.723Z'}),
46 't': None,
47}
48
49tree = {
50 'l': None,
51 'e': [record1, record2],
52}
53
54tree_cbor = encode_cbor(tree)
55tree_digest = hash_obj(tree_cbor)
56
57assert encode_b32(tree_digest) == 'bafyreif5wigbpxdo6kli4lltcrv47wndp23sknhulnoepe6vhze432oqoe'