forked from
microcosm.blue/microcosm-rs
Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
1/// see https://atproto.com/specs/did#at-protocol-did-identifier-syntax
2/// this parser is intentinonally lax: it should accept all valid DIDs, and
3/// may accept some invalid DIDs.
4///
5/// at the moment this implementation might also be quite bad and incomplete
6pub fn parse_did(s: &str) -> Option<String> {
7 // for now, just working through the rules laid out in the docs in order,
8 // without much regard for efficiency for now.
9
10 // The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z),
11 // digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-)
12 if !s
13 .chars()
14 .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | ':' | '%' | '-'))
15 {
16 return None;
17 }
18
19 // The URI is case-sensitive
20 // -> (nothing to check)
21
22 // The URI starts with lowercase `did:`
23 let unprefixed = s.strip_prefix("did:")?;
24
25 // The method segment is one or more lowercase letters (a-z), followed by :
26 let (method, identifier) = unprefixed.split_once(':')?;
27 if !method.chars().all(|c| c.is_ascii_lowercase()) {
28 return None;
29 }
30
31 // The remainder of the URI (the identifier) may contain any of the above-allowed
32 // ASCII characters, except for percent-sign (%)
33 // -> ok, ugh, gotta know our encoding context for this
34
35 // The URI (and thus the remaining identifier) may not end in ':'.
36 if identifier.ends_with(':') {
37 return None;
38 }
39
40 // Percent-sign (%) is used for "percent encoding" in the identifier section, and
41 // must always be followed by two hex characters
42 // -> again incoding context (bleh)
43
44 // Query (?) and fragment (#) sections are allowed in DID URIs, but not in DID
45 // identifiers. In the context of atproto, the query and fragment parts are not
46 // allowed.
47 // -> disallow here -- the uri decoder should already split them out first.
48
49 // DID identifiers do not generally have a maximum length restriction, but in the
50 // context of atproto, there is an initial hard limit of 2 KB.
51 // -> we're in atproto, so sure, let's enforce it. (would be sensible to do this
52 // -> first but we're following doc order)
53 if s.len() > (2 * 2_usize.pow(10)) {
54 return None;
55 }
56
57 // -> it's not actually written in the spec, but by example in the spec, the
58 // -> identifier cannot be empty
59 if identifier.is_empty() {
60 return None;
61 }
62
63 Some(s.to_string())
64 // the only normalization we might want would be percent-decoding, but we
65 // probably leave that to the uri decoder
66}
67
68#[cfg(test)]
69mod tests {
70 use super::*;
71
72 #[test]
73 fn test_did_parse() {
74 for (case, expected, detail) in vec![
75 ("", None, "empty str"),
76 (" ", None, "whitespace str"),
77 ("z", None, "not a did"),
78 ("did:plc", None, "no identifier separator colon"),
79 ("did:plc:", None, "missing identifier"),
80 (
81 "did:web:bad-example.com",
82 Some("did:web:bad-example.com"),
83 "web did",
84 ),
85 (
86 "did:plc:hdhoaan3xa3jiuq4fg4mefid",
87 Some("did:plc:hdhoaan3xa3jiuq4fg4mefid"),
88 "plc did",
89 ),
90 (
91 "DID:plc:hdhoaan3xa3jiuq4fg4mefid",
92 None,
93 "'did:' prefix must be lowercase",
94 ),
95 (
96 "did:ok:z",
97 Some("did:ok:z"),
98 "unknown did methods are allowed",
99 ),
100 ("did:BAD:z", None, "non-lowercase methods are not allowed"),
101 ("did:bad:z$z", None, "invalid chars are not allowed"),
102 (
103 "did:ok:z:z",
104 Some("did:ok:z:z"),
105 "colons are allowed in identifier",
106 ),
107 ("did:bad:z:", None, "colons not are allowed at the end"),
108 ("did:bad:z?q=y", None, "queries are not allowed in atproto"),
109 ("did:bad:z#a", None, "anchors are not allowed in atproto"),
110 ] {
111 assert_eq!(parse_did(case), expected.map(|s| s.to_string()), "{detail}");
112 }
113 }
114
115 #[test]
116 fn test_doc_exmples_atproto() {
117 // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
118 for case in ["did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz"] {
119 assert!(parse_did(case).is_some(), "should pass: {case}")
120 }
121 }
122
123 #[test]
124 fn test_doc_exmples_lexicon() {
125 // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
126 for case in [
127 "did:method:val:two",
128 "did:m:v",
129 "did:method::::val",
130 "did:method:-:_:.",
131 "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N",
132 ] {
133 assert!(parse_did(case).is_some(), "should pass: {case}")
134 }
135 }
136
137 #[test]
138 fn test_doc_exmples_invalid() {
139 // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
140 for case in [
141 "did:METHOD:val",
142 "did:m123:val",
143 "DID:method:val",
144 "did:method:",
145 "did:method:val/two",
146 "did:method:val?two",
147 "did:method:val#two",
148 ] {
149 assert!(parse_did(case).is_none(), "should fail: {case}")
150 }
151 }
152}