Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
at main 5.4 kB view raw
1/// see https://atproto.com/specs/did#at-protocol-did-identifier-syntax 2/// this parser is intentinonally lax: it should accept all valid DIDs, and 3/// may accept some invalid DIDs. 4/// 5/// at the moment this implementation might also be quite bad and incomplete 6pub fn parse_did(s: &str) -> Option<String> { 7 // for now, just working through the rules laid out in the docs in order, 8 // without much regard for efficiency for now. 9 10 // The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z), 11 // digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-) 12 if !s 13 .chars() 14 .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | ':' | '%' | '-')) 15 { 16 return None; 17 } 18 19 // The URI is case-sensitive 20 // -> (nothing to check) 21 22 // The URI starts with lowercase `did:` 23 let unprefixed = s.strip_prefix("did:")?; 24 25 // The method segment is one or more lowercase letters (a-z), followed by : 26 let (method, identifier) = unprefixed.split_once(':')?; 27 if !method.chars().all(|c| c.is_ascii_lowercase()) { 28 return None; 29 } 30 31 // The remainder of the URI (the identifier) may contain any of the above-allowed 32 // ASCII characters, except for percent-sign (%) 33 // -> ok, ugh, gotta know our encoding context for this 34 35 // The URI (and thus the remaining identifier) may not end in ':'. 36 if identifier.ends_with(':') { 37 return None; 38 } 39 40 // Percent-sign (%) is used for "percent encoding" in the identifier section, and 41 // must always be followed by two hex characters 42 // -> again incoding context (bleh) 43 44 // Query (?) and fragment (#) sections are allowed in DID URIs, but not in DID 45 // identifiers. In the context of atproto, the query and fragment parts are not 46 // allowed. 47 // -> disallow here -- the uri decoder should already split them out first. 48 49 // DID identifiers do not generally have a maximum length restriction, but in the 50 // context of atproto, there is an initial hard limit of 2 KB. 51 // -> we're in atproto, so sure, let's enforce it. (would be sensible to do this 52 // -> first but we're following doc order) 53 if s.len() > (2 * 2_usize.pow(10)) { 54 return None; 55 } 56 57 // -> it's not actually written in the spec, but by example in the spec, the 58 // -> identifier cannot be empty 59 if identifier.is_empty() { 60 return None; 61 } 62 63 Some(s.to_string()) 64 // the only normalization we might want would be percent-decoding, but we 65 // probably leave that to the uri decoder 66} 67 68#[cfg(test)] 69mod tests { 70 use super::*; 71 72 #[test] 73 fn test_did_parse() { 74 for (case, expected, detail) in vec![ 75 ("", None, "empty str"), 76 (" ", None, "whitespace str"), 77 ("z", None, "not a did"), 78 ("did:plc", None, "no identifier separator colon"), 79 ("did:plc:", None, "missing identifier"), 80 ( 81 "did:web:bad-example.com", 82 Some("did:web:bad-example.com"), 83 "web did", 84 ), 85 ( 86 "did:plc:hdhoaan3xa3jiuq4fg4mefid", 87 Some("did:plc:hdhoaan3xa3jiuq4fg4mefid"), 88 "plc did", 89 ), 90 ( 91 "DID:plc:hdhoaan3xa3jiuq4fg4mefid", 92 None, 93 "'did:' prefix must be lowercase", 94 ), 95 ( 96 "did:ok:z", 97 Some("did:ok:z"), 98 "unknown did methods are allowed", 99 ), 100 ("did:BAD:z", None, "non-lowercase methods are not allowed"), 101 ("did:bad:z$z", None, "invalid chars are not allowed"), 102 ( 103 "did:ok:z:z", 104 Some("did:ok:z:z"), 105 "colons are allowed in identifier", 106 ), 107 ("did:bad:z:", None, "colons not are allowed at the end"), 108 ("did:bad:z?q=y", None, "queries are not allowed in atproto"), 109 ("did:bad:z#a", None, "anchors are not allowed in atproto"), 110 ] { 111 assert_eq!(parse_did(case), expected.map(|s| s.to_string()), "{detail}"); 112 } 113 } 114 115 #[test] 116 fn test_doc_exmples_atproto() { 117 // https://atproto.com/specs/did#at-protocol-did-identifier-syntax 118 for case in ["did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz"] { 119 assert!(parse_did(case).is_some(), "should pass: {case}") 120 } 121 } 122 123 #[test] 124 fn test_doc_exmples_lexicon() { 125 // https://atproto.com/specs/did#at-protocol-did-identifier-syntax 126 for case in [ 127 "did:method:val:two", 128 "did:m:v", 129 "did:method::::val", 130 "did:method:-:_:.", 131 "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N", 132 ] { 133 assert!(parse_did(case).is_some(), "should pass: {case}") 134 } 135 } 136 137 #[test] 138 fn test_doc_exmples_invalid() { 139 // https://atproto.com/specs/did#at-protocol-did-identifier-syntax 140 for case in [ 141 "did:METHOD:val", 142 "did:m123:val", 143 "DID:method:val", 144 "did:method:", 145 "did:method:val/two", 146 "did:method:val?two", 147 "did:method:val#two", 148 ] { 149 assert!(parse_did(case).is_none(), "should fail: {case}") 150 } 151 } 152}