commit 1a7278a239a18bb2e6b280879f545def3f6a88d9 · nekomimi.pet/microcosm-rs

+2 -6

src/at_uri.rs

···

       17
       17
        
           if !s.is_ascii() {

     

       18
       18
        
               return None;

     

       19
       19
        
           }

     

       20
       20
       -
           // // A-Za-z0-9 . - _ ~

     

       21
       21
       -
           // if !s.chars().all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_' | '~')) {

     

       22
       22
       -
           //     return None

     

       23
       23
       -
           // }

     

       24
       20
        
       

     

       25
       21
        
           // Maximum overall length is 8 kilobytes (which may be shortened in the future)

     

       26
       22
        
           if s.len() > (8 * 2_usize.pow(10)) {

     
···

       59
       55
        
       

     

       60
       56
        
           // The URI scheme is `at`, and an authority part preceded with double slashes is always

     

       61
       57
        
           // required, so the URI always starts at://

     

       62
       62
       -
           // -> the spec doesn't explicitly say, but it seems like uri schemes are case-insensitive

     

       58
       58
       +
           // -> the spec doesn't explicitly say, but uri schemes can be case-insensitive?

     

       63
       59
        
           let (proto, rest) = s.split_at_checked(5)?;

     

       64
       60
        
           if !proto.eq_ignore_ascii_case("at://") {

     

       65
       61
        
               return None;

     
···

       225
       221
        
                   (

     

       226
       222
        
                       "at://bad-example.com/a/../b",

     

       227
       223
        
                       Some("at://bad-example.com/b"),

     

       228
       228
       -
                       "paths have traversals resolved (oof)",

     

       224
       224
       +
                       "paths have traversals resolved (oof)", // reminder to self: we are normalizing, not sanitizing

     

       229
       225
        
                   ),

     

       230
       226
        
                   (

     

       231
       227
        
                       "at://bad-example.com/../",

+152

src/did.rs

···

       1
       1
       +
       /// see https://atproto.com/specs/did#at-protocol-did-identifier-syntax

     

       2
       2
       +
       /// this parser is intentinonally lax: it should accept all valid DIDs, and

     

       3
       3
       +
       /// may accept some invalid DIDs.

     

       4
       4
       +
       ///

     

       5
       5
       +
       /// at the moment this implementation might also be quite bad and incomplete

     

       6
       6
       +
       pub fn parse_did(s: &str) -> Option<String> {

     

       7
       7
       +
           // for now, just working through the rules laid out in the docs in order,

     

       8
       8
       +
           // without much regard for efficiency for now.

     

       9
       9
       +
       

     

       10
       10
       +
           // The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z),

     

       11
       11
       +
           // digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-)

     

       12
       12
       +
           if !s

     

       13
       13
       +
               .chars()

     

       14
       14
       +
               .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | ':' | '%' | '-'))

     

       15
       15
       +
           {

     

       16
       16
       +
               return None;

     

       17
       17
       +
           }

     

       18
       18
       +
       

     

       19
       19
       +
           // The URI is case-sensitive

     

       20
       20
       +
           // -> (nothing to check)

     

       21
       21
       +
       

     

       22
       22
       +
           // The URI starts with lowercase `did:`

     

       23
       23
       +
           let unprefixed = s.strip_prefix("did:")?;

     

       24
       24
       +
       

     

       25
       25
       +
           // The method segment is one or more lowercase letters (a-z), followed by :

     

       26
       26
       +
           let (method, identifier) = unprefixed.split_once(':')?;

     

       27
       27
       +
           if !method.chars().all(|c| c.is_ascii_lowercase()) {

     

       28
       28
       +
               return None;

     

       29
       29
       +
           }

     

       30
       30
       +
       

     

       31
       31
       +
           // The remainder of the URI (the identifier) may contain any of the above-allowed

     

       32
       32
       +
           // ASCII characters, except for percent-sign (%)

     

       33
       33
       +
           // -> ok, ugh, gotta know our encoding context for this

     

       34
       34
       +
       

     

       35
       35
       +
           // The URI (and thus the remaining identifier) may not end in ':'.

     

       36
       36
       +
           if identifier.ends_with(':') {

     

       37
       37
       +
               return None;

     

       38
       38
       +
           }

     

       39
       39
       +
       

     

       40
       40
       +
           // Percent-sign (%) is used for "percent encoding" in the identifier section, and

     

       41
       41
       +
           // must always be followed by two hex characters

     

       42
       42
       +
           // -> again incoding context (bleh)

     

       43
       43
       +
       

     

       44
       44
       +
           // Query (?) and fragment (#) sections are allowed in DID URIs, but not in DID

     

       45
       45
       +
           // identifiers. In the context of atproto, the query and fragment parts are not

     

       46
       46
       +
           // allowed.

     

       47
       47
       +
           // -> disallow here -- the uri decoder should already split them out first.

     

       48
       48
       +
       

     

       49
       49
       +
           // DID identifiers do not generally have a maximum length restriction, but in the

     

       50
       50
       +
           // context of atproto, there is an initial hard limit of 2 KB.

     

       51
       51
       +
           // -> we're in atproto, so sure, let's enforce it. (would be sensible to do this

     

       52
       52
       +
           // ->   first but we're following doc order)

     

       53
       53
       +
           if s.len() > (2 * 2_usize.pow(10)) {

     

       54
       54
       +
               return None;

     

       55
       55
       +
           }

     

       56
       56
       +
       

     

       57
       57
       +
           // -> it's not actually written in the spec, but by example in the spec, the

     

       58
       58
       +
           // -> identifier cannot be empty

     

       59
       59
       +
           if identifier.is_empty() {

     

       60
       60
       +
               return None;

     

       61
       61
       +
           }

     

       62
       62
       +
       

     

       63
       63
       +
           Some(s.to_string())

     

       64
       64
       +
           // the only normalization we might want would be percent-decoding, but we

     

       65
       65
       +
           // probably leave that to the uri decoder

     

       66
       66
       +
       }

     

       67
       67
       +
       

     

       68
       68
       +
       #[cfg(test)]

     

       69
       69
       +
       mod tests {

     

       70
       70
       +
           use super::*;

     

       71
       71
       +
       

     

       72
       72
       +
           #[test]

     

       73
       73
       +
           fn test_did_parse() {

     

       74
       74
       +
               for (case, expected, detail) in vec![

     

       75
       75
       +
                   ("", None, "empty str"),

     

       76
       76
       +
                   (" ", None, "whitespace str"),

     

       77
       77
       +
                   ("z", None, "not a did"),

     

       78
       78
       +
                   ("did:plc", None, "no identifier separator colon"),

     

       79
       79
       +
                   ("did:plc:", None, "missing identifier"),

     

       80
       80
       +
                   (

     

       81
       81
       +
                       "did:web:bad-example.com",

     

       82
       82
       +
                       Some("did:web:bad-example.com"),

     

       83
       83
       +
                       "web did",

     

       84
       84
       +
                   ),

     

       85
       85
       +
                   (

     

       86
       86
       +
                       "did:plc:hdhoaan3xa3jiuq4fg4mefid",

     

       87
       87
       +
                       Some("did:plc:hdhoaan3xa3jiuq4fg4mefid"),

     

       88
       88
       +
                       "plc did",

     

       89
       89
       +
                   ),

     

       90
       90
       +
                   (

     

       91
       91
       +
                       "DID:plc:hdhoaan3xa3jiuq4fg4mefid",

     

       92
       92
       +
                       None,

     

       93
       93
       +
                       "'did:' prefix must be lowercase",

     

       94
       94
       +
                   ),

     

       95
       95
       +
                   (

     

       96
       96
       +
                       "did:ok:z",

     

       97
       97
       +
                       Some("did:ok:z"),

     

       98
       98
       +
                       "unknown did methods are allowed",

     

       99
       99
       +
                   ),

     

       100
       100
       +
                   ("did:BAD:z", None, "non-lowercase methods are not allowed"),

     

       101
       101
       +
                   ("did:bad:z$z", None, "invalid chars are not allowed"),

     

       102
       102
       +
                   (

     

       103
       103
       +
                       "did:ok:z:z",

     

       104
       104
       +
                       Some("did:ok:z:z"),

     

       105
       105
       +
                       "colons are allowed in identifier",

     

       106
       106
       +
                   ),

     

       107
       107
       +
                   ("did:bad:z:", None, "colons not are allowed at the end"),

     

       108
       108
       +
                   ("did:bad:z?q=y", None, "queries are not allowed in atproto"),

     

       109
       109
       +
                   ("did:bad:z#a", None, "anchors are not allowed in atproto"),

     

       110
       110
       +
               ] {

     

       111
       111
       +
                   assert_eq!(parse_did(case), expected.map(|s| s.to_string()), "{detail}");

     

       112
       112
       +
               }

     

       113
       113
       +
           }

     

       114
       114
       +
       

     

       115
       115
       +
           #[test]

     

       116
       116
       +
           fn test_doc_exmples_atproto() {

     

       117
       117
       +
               // https://atproto.com/specs/did#at-protocol-did-identifier-syntax

     

       118
       118
       +
               for case in vec!["did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz"] {

     

       119
       119
       +
                   assert!(parse_did(case).is_some(), "should pass: {case}")

     

       120
       120
       +
               }

     

       121
       121
       +
           }

     

       122
       122
       +
       

     

       123
       123
       +
           #[test]

     

       124
       124
       +
           fn test_doc_exmples_lexicon() {

     

       125
       125
       +
               // https://atproto.com/specs/did#at-protocol-did-identifier-syntax

     

       126
       126
       +
               for case in vec![

     

       127
       127
       +
                   "did:method:val:two",

     

       128
       128
       +
                   "did:m:v",

     

       129
       129
       +
                   "did:method::::val",

     

       130
       130
       +
                   "did:method:-:_:.",

     

       131
       131
       +
                   "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N",

     

       132
       132
       +
               ] {

     

       133
       133
       +
                   assert!(parse_did(case).is_some(), "should pass: {case}")

     

       134
       134
       +
               }

     

       135
       135
       +
           }

     

       136
       136
       +
       

     

       137
       137
       +
           #[test]

     

       138
       138
       +
           fn test_doc_exmples_invalid() {

     

       139
       139
       +
               // https://atproto.com/specs/did#at-protocol-did-identifier-syntax

     

       140
       140
       +
               for case in vec![

     

       141
       141
       +
                   "did:METHOD:val",

     

       142
       142
       +
                   "did:m123:val",

     

       143
       143
       +
                   "DID:method:val",

     

       144
       144
       +
                   "did:method:",

     

       145
       145
       +
                   "did:method:val/two",

     

       146
       146
       +
                   "did:method:val?two",

     

       147
       147
       +
                   "did:method:val#two",

     

       148
       148
       +
               ] {

     

       149
       149
       +
                   assert!(parse_did(case).is_none(), "should fail: {case}")

     

       150
       150
       +
               }

     

       151
       151
       +
           }

     

       152
       152
       +
       }

+12 -8

src/lib.rs

···

       1
       1
        
       use fluent_uri::Uri;

     

       2
       2
        
       

     

       3
       3
        
       pub mod at_uri;

     

       4
       4
       +
       pub mod did;

     

       4
       5
        
       

     

       5
       6
        
       #[derive(Debug, PartialEq)]

     

       6
       7
        
       pub enum Link {

     

       7
       8
        
           AtUri(String),

     

       8
       9
        
           Uri(String),

     

       9
       9
       -
       }

     

       10
       10
       -
       

     

       11
       11
       -
       // normalizing is a bit opinionated but ehhh

     

       12
       12
       -
       pub fn parse_at_uri(s: &str) -> Option<String> {

     

       13
       13
       -
           at_uri::parse_at_uri(s)

     

       10
       10
       +
           Did(String),

     

       14
       11
        
       }

     

       15
       12
        
       

     

       16
       13
        
       // normalizing is a bit opinionated but eh

     
···

       19
       16
        
       }

     

       20
       17
        
       

     

       21
       18
        
       pub fn parse_any(s: &str) -> Option<Link> {

     

       22
       22
       -
           parse_at_uri(s)

     

       23
       23
       -
               .map(Link::AtUri)

     

       24
       24
       -
               .or_else(|| parse_uri(s).map(Link::Uri))

     

       19
       19
       +
           at_uri::parse_at_uri(s).map(Link::AtUri).or_else(|| {

     

       20
       20
       +
               did::parse_did(s)

     

       21
       21
       +
                   .map(Link::Did)

     

       22
       22
       +
                   .or_else(|| parse_uri(s).map(Link::Uri))

     

       23
       23
       +
           })

     

       25
       24
        
       }

     

       26
       25
        
       

     

       27
       26
        
       #[cfg(test)]

     
···

       60
       59
        
                       "at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26".into()

     

       61
       60
        
                   )),

     

       62
       61
        
               );

     

       62
       62
       +
       

     

       63
       63
       +
               assert_eq!(

     

       64
       64
       +
                   parse_any("did:plc:44ybard66vv44zksje25o7dz"),

     

       65
       65
       +
                   Some(Link::Did("did:plc:44ybard66vv44zksje25o7dz".into()))

     

       66
       66
       +
               )

     

       63
       67
        
           }

     

       64
       68
        
       }