Fix TID validation #4

merged
opened by danabra.mov targeting main

TID format validator was rejecting valid TIDs like 3m3zm7eurxk26.

It seems like syntax format assumed in the file is wrong, so I amended it to use https://atproto.com/specs/tid#tid-syntax. In particular, see the "reference regex" in the spec, which is /^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$/ and doesn't match old code.

This is vibecoded so be cautious (though both Claude and ChatGPT seem happy with the result). I've also asked Claude to cross-check the new tests against atproto/packages/syntax/src/tid.ts.

Changed files
+24 -11
crates
slices-lexicon
src
validation
primitive
+24 -11
crates/slices-lexicon/src/validation/primitive/string.rs
···
/// Validates TID (Timestamp Identifier) format
///
-
/// TID format: 13-character base32-encoded timestamp + random bits
-
/// Uses Crockford base32 alphabet: 0123456789ABCDEFGHJKMNPQRSTVWXYZ (case-insensitive)
+
/// TID format: 13-character base32-sortable encoded timestamp + random bits
+
/// Uses ATProto base32-sortable alphabet: 234567abcdefghijklmnopqrstuvwxyz (lowercase only)
pub fn is_valid_tid(&self, value: &str) -> bool {
use regex::Regex;
···
return false;
}
-
// TID uses Crockford base32 (case-insensitive, excludes I, L, O, U)
-
let tid_regex = Regex::new(r"^[0-9A-HJKMNP-TV-Z]{13}$").unwrap();
-
let uppercase_value = value.to_uppercase();
+
// TID uses base32-sortable (s32) - lowercase only
+
// First character must be from limited set (ensures top bit is 0)
+
// Remaining 12 characters from full base32-sortable alphabet
+
let tid_regex = Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap();
-
tid_regex.is_match(&uppercase_value)
+
tid_regex.is_match(value)
}
/// Validates Record Key format
···
let validator = StringValidator;
-
// Valid TIDs (13 characters, Crockford base32)
-
assert!(validator.validate_data(&json!("3JZFKJT0000ZZ"), &schema, &ctx).is_ok());
-
assert!(validator.validate_data(&json!("3jzfkjt0000zz"), &schema, &ctx).is_ok()); // case insensitive
+
// Valid TIDs (base32-sortable, 13 chars, lowercase)
+
assert!(validator.validate_data(&json!("3m3zm7eurxk26"), &schema, &ctx).is_ok());
+
assert!(validator.validate_data(&json!("2222222222222"), &schema, &ctx).is_ok()); // minimum TID
+
assert!(validator.validate_data(&json!("a222222222222"), &schema, &ctx).is_ok()); // leading 'a' (lower bound)
+
assert!(validator.validate_data(&json!("j234567abcdef"), &schema, &ctx).is_ok()); // leading 'j' (upper bound)
+
-
// Invalid TIDs
+
// Invalid TIDs - uppercase not allowed (charset is lowercase only)
+
assert!(validator.validate_data(&json!("3m3zM7eurxk26"), &schema, &ctx).is_err()); // mixed case
+
+
// Invalid TIDs - wrong length
assert!(validator.validate_data(&json!("too-short"), &schema, &ctx).is_err());
assert!(validator.validate_data(&json!("too-long-string"), &schema, &ctx).is_err());
+
+
// Invalid TIDs - invalid characters (hyphen/punct rejected; digits 0,1,8,9 not allowed)
assert!(validator.validate_data(&json!("invalid-chars!"), &schema, &ctx).is_err());
-
assert!(validator.validate_data(&json!("invalid-ILOU0"), &schema, &ctx).is_err()); // invalid chars (I, L, O, U)
+
assert!(validator.validate_data(&json!("xyz1234567890"), &schema, &ctx).is_err()); // has 0,1,8,9
+
+
// Invalid TIDs - first character must be one of 234567abcdefghij
+
assert!(validator.validate_data(&json!("k222222222222"), &schema, &ctx).is_err()); // leading 'k' forbidden
+
assert!(validator.validate_data(&json!("z234567abcdef"), &schema, &ctx).is_err()); // leading 'z' forbidden
#[test]