experimental hashing with oxcaml
1(* Speed test comparing system sha256sum with Cryptokit and oxsha implementations *)
2
3let deadbeef_pattern = "\xde\xad\xbe\xef"
4
5(* Convert bytes to hex string *)
6let hex_of_bytes bytes =
7 let buf = Buffer.create (Bytes.length bytes * 2) in
8 Bytes.iter
9 (fun c -> Buffer.add_string buf (Printf.sprintf "%02x" (Char.code c)))
10 bytes;
11 Buffer.contents buf
12
13(* Create a 2GB file filled with 0xdeadbeef pattern *)
14let create_test_file filename size =
15 Printf.printf "Creating %s (%d bytes = %.2f GB)...\n%!"
16 filename size (float_of_int size /. (1024.0 *. 1024.0 *. 1024.0));
17
18 let oc = open_out_bin filename in
19 let chunk_size = 1024 * 1024 in (* 1 MB chunks *)
20 let chunk = Bytes.make chunk_size '\x00' in
21
22 (* Fill chunk with 0xdeadbeef pattern *)
23 for i = 0 to chunk_size - 1 do
24 Bytes.set chunk i deadbeef_pattern.[i mod 4]
25 done;
26
27 let chunks = size / chunk_size in
28 let remainder = size mod chunk_size in
29
30 for i = 0 to chunks - 1 do
31 output_bytes oc chunk;
32 if i mod 100 = 0 then (
33 Printf.printf "\rProgress: %.1f%%..."
34 (float_of_int i *. 100.0 /. float_of_int chunks);
35 flush stdout
36 )
37 done;
38
39 if remainder > 0 then
40 output oc chunk 0 remainder;
41
42 close_out oc;
43 Printf.printf "\rProgress: 100.0%%... Done!\n%!"
44
45(* SHA-256 using Cryptokit *)
46let sha256sum_cryptokit filename =
47 let hash = Cryptokit.Hash.sha256 () in
48 let digest =
49 In_channel.with_open_bin filename
50 (Cryptokit.hash_channel hash)
51 in
52 let hex_digest =
53 Cryptokit.transform_string
54 (Cryptokit.Hexa.encode ()) digest
55 in
56 hex_digest
57
58(* SHA-256 using system command *)
59let sha256sum_system filename =
60 let cmd = Printf.sprintf "sha256sum %s" (Filename.quote filename) in
61 let ic = Unix.open_process_in cmd in
62 let line = input_line ic in
63 let _ = Unix.close_process_in ic in
64 (* sha256sum outputs: "<hash> <filename>" *)
65 let hash = String.sub line 0 64 in
66 hash
67
68(* SHA-256 using oxsha with Unix.map_file *)
69let sha256sum_oxsha filename =
70 let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0 in
71 let stats = Unix.fstat fd in
72 let file_size = stats.Unix.st_size in
73
74 if file_size = 0 then (
75 (* Handle empty files *)
76 Unix.close fd;
77 let digest = Oxsha.hash_string "" in
78 hex_of_bytes digest
79 ) else (
80 let mapped =
81 Unix.map_file fd Bigarray.char Bigarray.c_layout false [| file_size |]
82 in
83 let ba = Bigarray.array1_of_genarray mapped in
84 Unix.close fd;
85
86 let digest = Oxsha.hash ba in
87 hex_of_bytes digest
88 )
89
90(* Time a function execution *)
91let time_function name f =
92 Printf.printf "\nRunning %s...\n%!" name;
93 let start = Unix.gettimeofday () in
94 let result = f () in
95 let elapsed = Unix.gettimeofday () -. start in
96 Printf.printf "%s completed in %.3f seconds\n%!" name elapsed;
97 (result, elapsed)
98
99let () =
100 let test_file = "test_2gb.bin" in
101 let file_size = 2 * 1024 * 1024 * 1024 in (* 2 GB *)
102
103 Printf.printf "=== SHA-256 Speed Test ===\n\n";
104
105 (* Create test file if it doesn't exist *)
106 if not (Sys.file_exists test_file) then
107 create_test_file test_file file_size
108 else
109 Printf.printf "Test file %s already exists, using existing file.\n%!" test_file;
110
111 (* Test system sha256sum *)
112 let (hash_system, time_system) =
113 time_function "system sha256sum" (fun () -> sha256sum_system test_file) in
114 Printf.printf "Hash: %s\n" hash_system;
115
116 (* Test Cryptokit implementation *)
117 let (hash_cryptokit, time_cryptokit) =
118 time_function "Cryptokit sha256sum" (fun () -> sha256sum_cryptokit test_file) in
119 Printf.printf "Hash: %s\n" hash_cryptokit;
120
121 (* Test oxsha implementation *)
122 let (hash_oxsha, time_oxsha) =
123 time_function "oxsha (mmap)" (fun () -> sha256sum_oxsha test_file) in
124 Printf.printf "Hash: %s\n" hash_oxsha;
125
126 (* Compare results *)
127 Printf.printf "\n=== Results ===\n";
128 Printf.printf "System sha256sum: %.3f seconds (%.2f MB/s)\n"
129 time_system
130 (float_of_int file_size /. time_system /. (1024.0 *. 1024.0));
131 Printf.printf "Cryptokit sha256sum: %.3f seconds (%.2f MB/s)\n"
132 time_cryptokit
133 (float_of_int file_size /. time_cryptokit /. (1024.0 *. 1024.0));
134 Printf.printf "oxsha (mmap): %.3f seconds (%.2f MB/s)\n"
135 time_oxsha
136 (float_of_int file_size /. time_oxsha /. (1024.0 *. 1024.0));
137
138 (* Find fastest *)
139 let times = [
140 ("System sha256sum", time_system);
141 ("Cryptokit", time_cryptokit);
142 ("oxsha (mmap)", time_oxsha)
143 ] in
144 let fastest_name, fastest_time =
145 List.fold_left (fun (n, t) (n', t') -> if t' < t then (n', t') else (n, t))
146 (List.hd times) (List.tl times)
147 in
148 Printf.printf "\nFastest: %s\n" fastest_name;
149 List.iter (fun (name, time) ->
150 if name <> fastest_name then
151 Printf.printf " %s is %.2fx faster than %s\n"
152 fastest_name (time /. fastest_time) name
153 ) times;
154
155 (* Verify hashes match *)
156 let hash_system_lower = String.lowercase_ascii hash_system in
157 let hash_cryptokit_lower = String.lowercase_ascii hash_cryptokit in
158 let hash_oxsha_lower = String.lowercase_ascii hash_oxsha in
159
160 if hash_system_lower = hash_cryptokit_lower && hash_system_lower = hash_oxsha_lower then
161 Printf.printf "\n✓ All hashes match!\n"
162 else (
163 Printf.printf "\n✗ ERROR: Hashes do not match!\n";
164 Printf.printf " System: %s\n" hash_system;
165 Printf.printf " Cryptokit: %s\n" hash_cryptokit;
166 Printf.printf " oxsha: %s\n" hash_oxsha;
167 exit 1
168 );
169
170 Printf.printf "\nNote: Test file %s has been preserved for future runs.\n" test_file;
171 Printf.printf " Delete it manually if you want to recreate it.\n"