OCaml library for Crockford's Base32
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6type invalid_length = { length: int; message: string }
7type invalid_character = { char: char; message: string }
8type invalid_checksum = { checksum: string; message: string }
9type checksum_mismatch = { expected: int64; got: int64; identifier: string }
10
11type decode_error =
12 | Invalid_length of invalid_length
13 | Invalid_character of invalid_character
14 | Invalid_checksum of invalid_checksum
15 | Checksum_mismatch of checksum_mismatch
16
17exception Decode_error of decode_error
18
19let pp_invalid_length fmt { length; message } =
20 Format.fprintf fmt "Invalid_length: length=%d, %s" length message
21
22let pp_invalid_character fmt { char; message } =
23 Format.fprintf fmt "Invalid_character: char='%c', %s" char message
24
25let pp_invalid_checksum fmt { checksum; message } =
26 Format.fprintf fmt "Invalid_checksum: checksum=%s, %s" checksum message
27
28let pp_checksum_mismatch fmt { expected; got; identifier } =
29 Format.fprintf fmt "Checksum_mismatch: expected=%Ld, got=%Ld, identifier=%s"
30 expected got identifier
31
32let pp_decode_error fmt = function
33 | Invalid_length e -> pp_invalid_length fmt e
34 | Invalid_character e -> pp_invalid_character fmt e
35 | Invalid_checksum e -> pp_invalid_checksum fmt e
36 | Checksum_mismatch e -> pp_checksum_mismatch fmt e
37
38let encoding_chars = "0123456789abcdefghjkmnpqrstvwxyz"
39
40let generate_checksum number =
41 Int64.(sub (add (sub 97L (rem (mul 100L number) 97L)) 1L) 0L)
42
43let validate number ~checksum =
44 Int64.equal checksum (generate_checksum number)
45
46let normalize str =
47 let len = String.length str in
48 let buf = Bytes.create len in
49 let rec process i j =
50 if i >= len then Bytes.sub_string buf 0 j
51 else
52 let c = String.get str i in
53 let c_lower = Char.lowercase_ascii c in
54 match c_lower with
55 | '-' -> process (i + 1) j
56 | 'i' | 'l' -> Bytes.set buf j '1'; process (i + 1) (j + 1)
57 | 'o' -> Bytes.set buf j '0'; process (i + 1) (j + 1)
58 | _ -> Bytes.set buf j c_lower; process (i + 1) (j + 1)
59 in
60 process 0 0
61
62let encode ?(split_every=0) ?(min_length=0) ?(checksum=false) number =
63 let original_number = number in
64
65 (* Build base32 encoding *)
66 let rec build_encoding acc n =
67 if Int64.equal n 0L then acc
68 else
69 let remainder = Int64.to_int (Int64.rem n 32L) in
70 let n' = Int64.div n 32L in
71 build_encoding (encoding_chars.[remainder] :: acc) n'
72 in
73
74 let encoded_list =
75 if Int64.equal number 0L then ['0']
76 else build_encoding [] number
77 in
78
79 let encoded_str = String.concat "" (List.map (String.make 1) encoded_list) in
80
81 (* Adjust min_length if checksum is enabled *)
82 let adjusted_length =
83 if checksum && min_length > 2 then min_length - 2
84 else min_length
85 in
86
87 (* Pad with zeros if needed *)
88 let padded =
89 if adjusted_length > 0 && String.length encoded_str < adjusted_length then
90 String.make (adjusted_length - String.length encoded_str) '0' ^ encoded_str
91 else
92 encoded_str
93 in
94
95 (* Add checksum *)
96 let with_checksum =
97 if checksum then
98 let cs = generate_checksum original_number in
99 padded ^ Printf.sprintf "%02Ld" cs
100 else
101 padded
102 in
103
104 (* Split if requested *)
105 if split_every > 0 then
106 let len = String.length with_checksum in
107 let num_splits = (len + split_every - 1) / split_every in
108 let splits = Array.make num_splits "" in
109 for i = 0 to num_splits - 1 do
110 let start = i * split_every in
111 let chunk_len = min split_every (len - start) in
112 splits.(i) <- String.sub with_checksum start chunk_len
113 done;
114 String.concat "-" (Array.to_list splits)
115 else
116 with_checksum
117
118let decode ?(checksum=false) str =
119 let encoded = normalize str in
120
121 let (encoded_part, checksum_value) =
122 if checksum then begin
123 if String.length encoded < 3 then
124 raise (Decode_error (Invalid_checksum {
125 checksum = encoded;
126 message = "encoded string too short for checksum"
127 }));
128
129 let cs_str = String.sub encoded (String.length encoded - 2) 2 in
130 let cs =
131 try Int64.of_string cs_str
132 with Failure _ ->
133 raise (Decode_error (Invalid_checksum {
134 checksum = cs_str;
135 message = "invalid checksum format"
136 }))
137 in
138 (String.sub encoded 0 (String.length encoded - 2), Some cs)
139 end else
140 (encoded, None)
141 in
142
143 (* Decode base32 *)
144 let number = ref 0L in
145 String.iter (fun c ->
146 number := Int64.mul !number 32L;
147 match String.index_opt encoding_chars c with
148 | Some pos -> number := Int64.add !number (Int64.of_int pos)
149 | None ->
150 raise (Decode_error (Invalid_character {
151 char = c;
152 message = Printf.sprintf "character '%c' not in base32 alphabet" c
153 }))
154 ) encoded_part;
155
156 (* Validate checksum if present *)
157 (match checksum_value with
158 | Some cs ->
159 if not (validate !number ~checksum:cs) then
160 raise (Decode_error (Checksum_mismatch {
161 expected = generate_checksum !number;
162 got = cs;
163 identifier = str
164 }))
165 | None -> ());
166
167 !number
168
169let generate ~length ?(split_every=0) ?(checksum=false) ?(rng=Random.float) () =
170 if checksum && length < 3 then
171 raise (Decode_error (Invalid_length {
172 length;
173 message = "length must be >= 3 if checksum is enabled"
174 }));
175
176 let adjusted_length = if checksum then length - 2 else length in
177
178 (* Generate random number between 0 and 32^length *)
179 let max_val = 32.0 ** float_of_int adjusted_length in
180 let random_num = Int64.of_float (rng max_val) in
181
182 encode ~split_every ~min_length:adjusted_length ~checksum random_num