···
1
+
/// see https://atproto.com/specs/did#at-protocol-did-identifier-syntax
2
+
/// this parser is intentinonally lax: it should accept all valid DIDs, and
3
+
/// may accept some invalid DIDs.
5
+
/// at the moment this implementation might also be quite bad and incomplete
6
+
pub fn parse_did(s: &str) -> Option<String> {
7
+
// for now, just working through the rules laid out in the docs in order,
8
+
// without much regard for efficiency for now.
10
+
// The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z),
11
+
// digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-)
14
+
.all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | ':' | '%' | '-'))
19
+
// The URI is case-sensitive
20
+
// -> (nothing to check)
22
+
// The URI starts with lowercase `did:`
23
+
let unprefixed = s.strip_prefix("did:")?;
25
+
// The method segment is one or more lowercase letters (a-z), followed by :
26
+
let (method, identifier) = unprefixed.split_once(':')?;
27
+
if !method.chars().all(|c| c.is_ascii_lowercase()) {
31
+
// The remainder of the URI (the identifier) may contain any of the above-allowed
32
+
// ASCII characters, except for percent-sign (%)
33
+
// -> ok, ugh, gotta know our encoding context for this
35
+
// The URI (and thus the remaining identifier) may not end in ':'.
36
+
if identifier.ends_with(':') {
40
+
// Percent-sign (%) is used for "percent encoding" in the identifier section, and
41
+
// must always be followed by two hex characters
42
+
// -> again incoding context (bleh)
44
+
// Query (?) and fragment (#) sections are allowed in DID URIs, but not in DID
45
+
// identifiers. In the context of atproto, the query and fragment parts are not
47
+
// -> disallow here -- the uri decoder should already split them out first.
49
+
// DID identifiers do not generally have a maximum length restriction, but in the
50
+
// context of atproto, there is an initial hard limit of 2 KB.
51
+
// -> we're in atproto, so sure, let's enforce it. (would be sensible to do this
52
+
// -> first but we're following doc order)
53
+
if s.len() > (2 * 2_usize.pow(10)) {
57
+
// -> it's not actually written in the spec, but by example in the spec, the
58
+
// -> identifier cannot be empty
59
+
if identifier.is_empty() {
64
+
// the only normalization we might want would be percent-decoding, but we
65
+
// probably leave that to the uri decoder
73
+
fn test_did_parse() {
74
+
for (case, expected, detail) in vec![
75
+
("", None, "empty str"),
76
+
(" ", None, "whitespace str"),
77
+
("z", None, "not a did"),
78
+
("did:plc", None, "no identifier separator colon"),
79
+
("did:plc:", None, "missing identifier"),
81
+
"did:web:bad-example.com",
82
+
Some("did:web:bad-example.com"),
86
+
"did:plc:hdhoaan3xa3jiuq4fg4mefid",
87
+
Some("did:plc:hdhoaan3xa3jiuq4fg4mefid"),
91
+
"DID:plc:hdhoaan3xa3jiuq4fg4mefid",
93
+
"'did:' prefix must be lowercase",
98
+
"unknown did methods are allowed",
100
+
("did:BAD:z", None, "non-lowercase methods are not allowed"),
101
+
("did:bad:z$z", None, "invalid chars are not allowed"),
104
+
Some("did:ok:z:z"),
105
+
"colons are allowed in identifier",
107
+
("did:bad:z:", None, "colons not are allowed at the end"),
108
+
("did:bad:z?q=y", None, "queries are not allowed in atproto"),
109
+
("did:bad:z#a", None, "anchors are not allowed in atproto"),
111
+
assert_eq!(parse_did(case), expected.map(|s| s.to_string()), "{detail}");
116
+
fn test_doc_exmples_atproto() {
117
+
// https://atproto.com/specs/did#at-protocol-did-identifier-syntax
118
+
for case in vec!["did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz"] {
119
+
assert!(parse_did(case).is_some(), "should pass: {case}")
124
+
fn test_doc_exmples_lexicon() {
125
+
// https://atproto.com/specs/did#at-protocol-did-identifier-syntax
127
+
"did:method:val:two",
129
+
"did:method::::val",
130
+
"did:method:-:_:.",
131
+
"did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N",
133
+
assert!(parse_did(case).is_some(), "should pass: {case}")
138
+
fn test_doc_exmples_invalid() {
139
+
// https://atproto.com/specs/did#at-protocol-did-identifier-syntax
145
+
"did:method:val/two",
146
+
"did:method:val?two",
147
+
"did:method:val#two",
149
+
assert!(parse_did(case).is_none(), "should fail: {case}")