OCaml library for Crockford's Base32
1type invalid_length = { length: int; message: string }
2type invalid_character = { char: char; message: string }
3type invalid_checksum = { checksum: string; message: string }
4type checksum_mismatch = { expected: int64; got: int64; identifier: string }
5
6type decode_error =
7 | Invalid_length of invalid_length
8 | Invalid_character of invalid_character
9 | Invalid_checksum of invalid_checksum
10 | Checksum_mismatch of checksum_mismatch
11
12exception Decode_error of decode_error
13
14let pp_invalid_length fmt { length; message } =
15 Format.fprintf fmt "Invalid_length: length=%d, %s" length message
16
17let pp_invalid_character fmt { char; message } =
18 Format.fprintf fmt "Invalid_character: char='%c', %s" char message
19
20let pp_invalid_checksum fmt { checksum; message } =
21 Format.fprintf fmt "Invalid_checksum: checksum=%s, %s" checksum message
22
23let pp_checksum_mismatch fmt { expected; got; identifier } =
24 Format.fprintf fmt "Checksum_mismatch: expected=%Ld, got=%Ld, identifier=%s"
25 expected got identifier
26
27let pp_decode_error fmt = function
28 | Invalid_length e -> pp_invalid_length fmt e
29 | Invalid_character e -> pp_invalid_character fmt e
30 | Invalid_checksum e -> pp_invalid_checksum fmt e
31 | Checksum_mismatch e -> pp_checksum_mismatch fmt e
32
33let encoding_chars = "0123456789abcdefghjkmnpqrstvwxyz"
34
35let generate_checksum number =
36 Int64.(sub (add (sub 97L (rem (mul 100L number) 97L)) 1L) 0L)
37
38let validate number ~checksum =
39 Int64.equal checksum (generate_checksum number)
40
41let normalize str =
42 let len = String.length str in
43 let buf = Bytes.create len in
44 let rec process i j =
45 if i >= len then Bytes.sub_string buf 0 j
46 else
47 let c = String.get str i in
48 let c_lower = Char.lowercase_ascii c in
49 match c_lower with
50 | '-' -> process (i + 1) j
51 | 'i' | 'l' -> Bytes.set buf j '1'; process (i + 1) (j + 1)
52 | 'o' -> Bytes.set buf j '0'; process (i + 1) (j + 1)
53 | _ -> Bytes.set buf j c_lower; process (i + 1) (j + 1)
54 in
55 process 0 0
56
57let encode ?(split_every=0) ?(min_length=0) ?(checksum=false) number =
58 let original_number = number in
59
60 (* Build base32 encoding *)
61 let rec build_encoding acc n =
62 if Int64.equal n 0L then acc
63 else
64 let remainder = Int64.to_int (Int64.rem n 32L) in
65 let n' = Int64.div n 32L in
66 build_encoding (encoding_chars.[remainder] :: acc) n'
67 in
68
69 let encoded_list =
70 if Int64.equal number 0L then ['0']
71 else build_encoding [] number
72 in
73
74 let encoded_str = String.concat "" (List.map (String.make 1) encoded_list) in
75
76 (* Adjust min_length if checksum is enabled *)
77 let adjusted_length =
78 if checksum && min_length > 2 then min_length - 2
79 else min_length
80 in
81
82 (* Pad with zeros if needed *)
83 let padded =
84 if adjusted_length > 0 && String.length encoded_str < adjusted_length then
85 String.make (adjusted_length - String.length encoded_str) '0' ^ encoded_str
86 else
87 encoded_str
88 in
89
90 (* Add checksum *)
91 let with_checksum =
92 if checksum then
93 let cs = generate_checksum original_number in
94 padded ^ Printf.sprintf "%02Ld" cs
95 else
96 padded
97 in
98
99 (* Split if requested *)
100 if split_every > 0 then
101 let len = String.length with_checksum in
102 let num_splits = (len + split_every - 1) / split_every in
103 let splits = Array.make num_splits "" in
104 for i = 0 to num_splits - 1 do
105 let start = i * split_every in
106 let chunk_len = min split_every (len - start) in
107 splits.(i) <- String.sub with_checksum start chunk_len
108 done;
109 String.concat "-" (Array.to_list splits)
110 else
111 with_checksum
112
113let decode ?(checksum=false) str =
114 let encoded = normalize str in
115
116 let (encoded_part, checksum_value) =
117 if checksum then begin
118 if String.length encoded < 3 then
119 raise (Decode_error (Invalid_checksum {
120 checksum = encoded;
121 message = "encoded string too short for checksum"
122 }));
123
124 let cs_str = String.sub encoded (String.length encoded - 2) 2 in
125 let cs =
126 try Int64.of_string cs_str
127 with Failure _ ->
128 raise (Decode_error (Invalid_checksum {
129 checksum = cs_str;
130 message = "invalid checksum format"
131 }))
132 in
133 (String.sub encoded 0 (String.length encoded - 2), Some cs)
134 end else
135 (encoded, None)
136 in
137
138 (* Decode base32 *)
139 let number = ref 0L in
140 String.iter (fun c ->
141 number := Int64.mul !number 32L;
142 match String.index_opt encoding_chars c with
143 | Some pos -> number := Int64.add !number (Int64.of_int pos)
144 | None ->
145 raise (Decode_error (Invalid_character {
146 char = c;
147 message = Printf.sprintf "character '%c' not in base32 alphabet" c
148 }))
149 ) encoded_part;
150
151 (* Validate checksum if present *)
152 (match checksum_value with
153 | Some cs ->
154 if not (validate !number ~checksum:cs) then
155 raise (Decode_error (Checksum_mismatch {
156 expected = generate_checksum !number;
157 got = cs;
158 identifier = str
159 }))
160 | None -> ());
161
162 !number
163
164let generate ~length ?(split_every=0) ?(checksum=false) () =
165 if checksum && length < 3 then
166 raise (Decode_error (Invalid_length {
167 length;
168 message = "length must be >= 3 if checksum is enabled"
169 }));
170
171 let adjusted_length = if checksum then length - 2 else length in
172
173 (* Generate random number between 0 and 32^length *)
174 let max_val = 32.0 ** float_of_int adjusted_length in
175 let random_num = Int64.of_float (Random.float max_val) in
176
177 encode ~split_every ~min_length:adjusted_length ~checksum random_num