···
Spawns variable number of echo servers on random ports, then exercises
the connection pool with multiple parallel client fibers.
10
+
Collects detailed event traces for visualization.
12
-
let src = Logs.Src.create "stress_test" ~doc:"Connection pool stress test"
13
-
module Log = (val Logs.src_log src : Logs.LOG)
(** Configuration for the stress test *)
15
+
name : string; (** Test name for identification *)
num_servers : int; (** Number of echo servers to spawn *)
num_clients : int; (** Number of client connections per server *)
messages_per_client : int; (** Number of messages each client sends *)
···
···
34
+
(** Test presets for different scenarios *)
36
+
(* High connection reuse - few connections, many messages *)
37
+
{ name = "high_reuse";
40
+
messages_per_client = 50;
41
+
max_parallel_clients = 10;
45
+
(* Many endpoints - test endpoint scaling *)
46
+
{ name = "many_endpoints";
49
+
messages_per_client = 10;
50
+
max_parallel_clients = 50;
54
+
(* High concurrency - stress parallel connections *)
55
+
{ name = "high_concurrency";
58
+
messages_per_client = 5;
59
+
max_parallel_clients = 100;
63
+
(* Large messages - test throughput *)
64
+
{ name = "large_messages";
67
+
messages_per_client = 20;
68
+
max_parallel_clients = 30;
69
+
message_size = 1024;
72
+
(* Constrained pool - force queuing *)
73
+
{ name = "constrained_pool";
76
+
messages_per_client = 10;
77
+
max_parallel_clients = 50;
81
+
(* Burst traffic - many clients, few messages each *)
82
+
{ name = "burst_traffic";
85
+
messages_per_client = 2;
86
+
max_parallel_clients = 100;
92
+
(** Extended stress test - 100x messages, 10x clients/servers *)
93
+
let extended_preset = {
94
+
name = "extended_stress";
97
+
messages_per_client = 100;
98
+
max_parallel_clients = 500;
(** Statistics collected during test *)
36
-
mutable total_connections : int;
37
-
mutable total_messages : int;
38
-
mutable total_bytes : int;
39
-
mutable errors : int;
40
-
mutable min_latency : float;
41
-
mutable max_latency : float;
42
-
mutable total_latency : float;
104
+
type latency_stats = {
105
+
mutable count : int;
106
+
mutable total : float;
107
+
mutable min : float;
108
+
mutable max : float;
45
-
let create_stats () = {
46
-
total_connections = 0;
50
-
min_latency = Float.infinity;
52
-
total_latency = 0.0;
111
+
let create_latency_stats () = {
114
+
min = Float.infinity;
let update_latency stats latency =
56
-
stats.min_latency <- min stats.min_latency latency;
57
-
stats.max_latency <- max stats.max_latency latency;
58
-
stats.total_latency <- stats.total_latency +. latency
119
+
stats.count <- stats.count + 1;
120
+
stats.total <- stats.total +. latency;
121
+
stats.min <- min stats.min latency;
122
+
stats.max <- max stats.max latency
(** Generate a random message of given size *)
let generate_message size =
···
String.init size (fun _ -> chars.[Random.int len])
(** Echo server handler - echoes back everything it receives *)
67
-
let handle_echo_client flow addr =
68
-
Log.debug (fun m -> m "Echo server: accepted connection from %a"
69
-
Eio.Net.Sockaddr.pp addr);
131
+
let handle_echo_client flow _addr =
let buf = Cstruct.create 4096 in
match Eio.Flow.single_read flow buf with
···
let data = Cstruct.sub buf 0 n in
Eio.Flow.write flow [data];
77
-
| exception End_of_file ->
78
-
Log.debug (fun m -> m "Echo server: client disconnected from %a"
79
-
Eio.Net.Sockaddr.pp addr)
139
+
| exception End_of_file -> ()
(** Start an echo server on a random port, returns the port number *)
let start_echo_server ~sw net =
85
-
(* Listen on port 0 to get a random available port *)
let addr = `Tcp (Eio.Net.Ipaddr.V4.loopback, 0) in
let listening_socket = Eio.Net.listen net ~sw ~backlog:128 ~reuse_addr:true addr in
89
-
(* Get the actual port assigned *)
let actual_addr = Eio.Net.listening_addr listening_socket in
let port = match actual_addr with
| _ -> failwith "Expected TCP address"
96
-
Log.info (fun m -> m "Echo server started on port %d" port);
98
-
(* Start accepting connections in a daemon fiber.
99
-
The daemon runs until cancelled when the switch finishes. *)
Eio.Fiber.fork_daemon ~sw (fun () ->
Eio.Net.accept_fork ~sw listening_socket
104
-
~on_error:(fun ex ->
105
-
Log.warn (fun m -> m "Echo server error: %a" Fmt.exn ex))
157
+
~on_error:(fun _ -> ())
···
(** Client test: connect via pool, send message, verify echo *)
116
-
let run_client_test ~clock pool endpoint message test_stats =
168
+
let run_client_test ~clock ~collector pool endpoint endpoint_id message client_id latency_stats errors =
let msg_len = String.length message in
let start_time = Eio.Time.now clock in
172
+
(* Get or create connection ID for tracking *)
173
+
let conn_id = Trace.next_connection_id collector in
Conpool.with_connection pool endpoint (fun flow ->
177
+
(* Record acquire event *)
178
+
Trace.record collector ~clock ~event_type:Trace.Connection_acquired
179
+
~endpoint_id ~connection_id:conn_id ~client_id ();
Eio.Flow.copy_string message flow;
124
-
Eio.Flow.copy_string "\n" flow; (* delimiter *)
183
+
Eio.Flow.copy_string "\n" flow;
184
+
Trace.record collector ~clock ~event_type:Trace.Message_sent
185
+
~endpoint_id ~connection_id:conn_id ~client_id ();
let response = Eio.Buf_read.of_flow flow ~max_size:(msg_len + 1) in
let echoed = Eio.Buf_read.line response in
190
+
Trace.record collector ~clock ~event_type:Trace.Message_received
191
+
~endpoint_id ~connection_id:conn_id ~client_id ();
let end_time = Eio.Time.now clock in
131
-
let latency = end_time -. start_time in
194
+
let latency = (end_time -. start_time) *. 1000.0 in (* Convert to ms *)
if String.equal echoed message then begin
134
-
test_stats.total_messages <- test_stats.total_messages + 1;
135
-
test_stats.total_bytes <- test_stats.total_bytes + msg_len;
136
-
update_latency test_stats latency;
137
-
Log.debug (fun m -> m "Client: echoed %d bytes in %.3fms"
138
-
msg_len (latency *. 1000.0))
197
+
update_latency latency_stats latency;
198
+
Trace.record collector ~clock ~event_type:Trace.Message_verified
199
+
~endpoint_id ~connection_id:conn_id ~client_id ()
140
-
test_stats.errors <- test_stats.errors + 1;
141
-
Log.err (fun m -> m "Client: echo mismatch! sent=%S got=%S" message echoed)
144
-
test_stats.total_connections <- test_stats.total_connections + 1
202
+
Trace.record collector ~clock ~event_type:(Trace.Connection_error "echo_mismatch")
203
+
~endpoint_id ~connection_id:conn_id ~client_id ()
206
+
(* Record release event *)
207
+
Trace.record collector ~clock ~event_type:Trace.Connection_released
208
+
~endpoint_id ~connection_id:conn_id ~client_id ()
146
-
test_stats.errors <- test_stats.errors + 1;
147
-
Log.err (fun m -> m "Client error: %a" Fmt.exn ex)
212
+
Trace.record collector ~clock ~event_type:(Trace.Connection_error (Printexc.to_string ex))
213
+
~endpoint_id ~connection_id:conn_id ~client_id ()
(** Run a single client that sends multiple messages *)
150
-
let run_client ~clock pool endpoints config test_stats client_id =
151
-
Log.debug (fun m -> m "Starting client %d" client_id);
153
-
for msg_num = 1 to config.messages_per_client do
154
-
(* Pick a random endpoint *)
216
+
let run_client ~clock ~collector pool endpoints config latency_stats errors client_id =
217
+
for _ = 1 to config.messages_per_client do
let endpoint_idx = Random.int (Array.length endpoints) in
let endpoint = endpoints.(endpoint_idx) in
158
-
(* Generate unique message *)
159
-
let message = Printf.sprintf "client%d-msg%d-%s"
160
-
client_id msg_num (generate_message config.message_size) in
162
-
run_client_test ~clock pool endpoint message test_stats
165
-
Log.debug (fun m -> m "Client %d completed" client_id)
220
+
let message = Printf.sprintf "c%d-%s" client_id (generate_message config.message_size) in
221
+
run_client_test ~clock ~collector pool endpoint endpoint_idx message client_id latency_stats errors
167
-
(** Main stress test runner *)
168
-
let run_stress_test ~env config =
224
+
(** Main stress test runner - returns a test trace *)
225
+
let run_stress_test ~env config : Trace.test_trace =
let net = Eio.Stdenv.net env in
let clock = Eio.Stdenv.clock env in
172
-
Log.info (fun m -> m "=== Stress Test Configuration ===");
173
-
Log.info (fun m -> m "Servers: %d" config.num_servers);
174
-
Log.info (fun m -> m "Clients per server: %d" config.num_clients);
175
-
Log.info (fun m -> m "Messages per client: %d" config.messages_per_client);
176
-
Log.info (fun m -> m "Max parallel clients: %d" config.max_parallel_clients);
177
-
Log.info (fun m -> m "Message size: %d bytes" config.message_size);
178
-
Log.info (fun m -> m "Pool size per endpoint: %d" config.pool_size);
180
-
(* Use a sub-switch for servers so we can cancel them when done *)
181
-
let test_passed = ref false in
182
-
let expected_messages = ref 0 in
184
-
Eio.Switch.run @@ fun sw ->
185
-
(* Start echo servers *)
186
-
Log.info (fun m -> m "Starting %d echo servers..." config.num_servers);
187
-
let ports = Array.init config.num_servers (fun _ ->
188
-
start_echo_server ~sw net
229
+
let collector = Trace.create_collector () in
230
+
let latency_stats = create_latency_stats () in
231
+
let errors = ref 0 in
232
+
let ports = ref [||] in
191
-
(* Small delay to ensure servers are ready *)
192
-
Eio.Time.sleep clock 0.1;
234
+
let trace_config : Trace.test_config = {
235
+
num_servers = config.num_servers;
236
+
num_clients = config.num_clients;
237
+
messages_per_client = config.messages_per_client;
238
+
max_parallel_clients = config.max_parallel_clients;
239
+
message_size = config.message_size;
240
+
pool_size = config.pool_size;
194
-
(* Create endpoints for all servers *)
195
-
let endpoints = Array.map (fun port ->
196
-
Conpool.Endpoint.make ~host:"127.0.0.1" ~port
243
+
let start_unix_time = Unix.gettimeofday () in
199
-
Log.info (fun m -> m "Servers ready on ports: %s"
200
-
(String.concat ", " (Array.to_list (Array.map string_of_int ports))));
245
+
let result = ref None in
202
-
(* Create connection pool *)
203
-
let pool_config = Conpool.Config.make
204
-
~max_connections_per_endpoint:config.pool_size
205
-
~max_idle_time:30.0
206
-
~max_connection_lifetime:120.0
207
-
~connect_timeout:5.0
208
-
~connect_retry_count:3
249
+
Eio.Switch.run @@ fun sw ->
250
+
(* Start echo servers *)
251
+
ports := Array.init config.num_servers (fun _ ->
252
+
start_echo_server ~sw net
212
-
let pool = Conpool.create ~sw ~net ~clock ~config:pool_config () in
213
-
Log.info (fun m -> m "Connection pool created");
255
+
Eio.Time.sleep clock 0.05;
215
-
(* Initialize test statistics *)
216
-
let test_stats = create_stats () in
257
+
let endpoints = Array.map (fun port ->
258
+
Conpool.Endpoint.make ~host:"127.0.0.1" ~port
218
-
(* Calculate total clients *)
219
-
let total_clients = config.num_servers * config.num_clients in
220
-
expected_messages := total_clients * config.messages_per_client;
221
-
Log.info (fun m -> m "Running %d total clients..." total_clients);
261
+
(* Create connection pool with hooks to track events *)
262
+
let pool_config = Conpool.Config.make
263
+
~max_connections_per_endpoint:config.pool_size
264
+
~max_idle_time:30.0
265
+
~max_connection_lifetime:120.0
266
+
~connect_timeout:5.0
267
+
~connect_retry_count:3
268
+
~on_connection_created:(fun ep ->
269
+
let port = Conpool.Endpoint.port ep in
270
+
let endpoint_id = Array.to_list !ports
271
+
|> List.mapi (fun i p -> (i, p))
272
+
|> List.find (fun (_, p) -> p = port)
274
+
let conn_id = Trace.next_connection_id collector in
275
+
Trace.record collector ~clock ~event_type:Trace.Connection_created
276
+
~endpoint_id ~connection_id:conn_id ()
278
+
~on_connection_reused:(fun ep ->
279
+
let port = Conpool.Endpoint.port ep in
280
+
let endpoint_id = Array.to_list !ports
281
+
|> List.mapi (fun i p -> (i, p))
282
+
|> List.find (fun (_, p) -> p = port)
284
+
let conn_id = Trace.next_connection_id collector in
285
+
Trace.record collector ~clock ~event_type:Trace.Connection_reused
286
+
~endpoint_id ~connection_id:conn_id ()
288
+
~on_connection_closed:(fun ep ->
289
+
let port = Conpool.Endpoint.port ep in
290
+
let endpoint_id = Array.to_list !ports
291
+
|> List.mapi (fun i p -> (i, p))
292
+
|> List.find (fun (_, p) -> p = port)
294
+
let conn_id = Trace.next_connection_id collector in
295
+
Trace.record collector ~clock ~event_type:Trace.Connection_closed
296
+
~endpoint_id ~connection_id:conn_id ()
223
-
let start_time = Eio.Time.now clock in
301
+
let pool = Conpool.create ~sw ~net ~clock ~config:pool_config () in
225
-
(* Run clients in parallel using Fiber.List *)
226
-
let client_ids = List.init total_clients (fun i -> i) in
227
-
Eio.Fiber.List.iter ~max_fibers:config.max_parallel_clients
229
-
run_client ~clock pool endpoints config test_stats client_id)
303
+
(* Record start time *)
304
+
let start_time = Eio.Time.now clock in
305
+
Trace.set_start_time collector start_time;
232
-
let end_time = Eio.Time.now clock in
233
-
let total_time = end_time -. start_time in
307
+
(* Run clients in parallel *)
308
+
let total_clients = config.num_servers * config.num_clients in
309
+
let client_ids = List.init total_clients (fun i -> i) in
310
+
Eio.Fiber.List.iter ~max_fibers:config.max_parallel_clients
312
+
run_client ~clock ~collector pool endpoints config latency_stats errors client_id)
235
-
(* Print results *)
236
-
Log.info (fun m -> m "");
237
-
Log.info (fun m -> m "=== Test Results ===");
238
-
Log.info (fun m -> m "Total time: %.3fs" total_time);
239
-
Log.info (fun m -> m "Total connections: %d" test_stats.total_connections);
240
-
Log.info (fun m -> m "Total messages: %d" test_stats.total_messages);
241
-
Log.info (fun m -> m "Total bytes transferred: %d" test_stats.total_bytes);
242
-
Log.info (fun m -> m "Errors: %d" test_stats.errors);
315
+
let end_time = Eio.Time.now clock in
316
+
let duration = end_time -. start_time in
244
-
if test_stats.total_messages > 0 then begin
245
-
let avg_latency = test_stats.total_latency /.
246
-
float_of_int test_stats.total_messages in
247
-
Log.info (fun m -> m "Latency (min/avg/max): %.3fms / %.3fms / %.3fms"
248
-
(test_stats.min_latency *. 1000.0)
249
-
(avg_latency *. 1000.0)
250
-
(test_stats.max_latency *. 1000.0));
251
-
Log.info (fun m -> m "Throughput: %.1f messages/sec"
252
-
(float_of_int test_stats.total_messages /. total_time));
253
-
Log.info (fun m -> m "Bandwidth: %.1f KB/sec"
254
-
(float_of_int test_stats.total_bytes /. total_time /. 1024.0))
319
+
let events = Trace.get_events collector in
320
+
let endpoint_summaries = Trace.compute_endpoint_summaries events config.num_servers !ports in
257
-
(* Print pool statistics for each endpoint *)
258
-
Log.info (fun m -> m "");
259
-
Log.info (fun m -> m "=== Pool Statistics ===");
260
-
Array.iteri (fun i endpoint ->
261
-
let stats = Conpool.stats pool endpoint in
262
-
Log.info (fun m -> m "Endpoint %d (port %d):" i ports.(i));
263
-
Log.info (fun m -> m " Active: %d, Idle: %d"
264
-
(Conpool.Stats.active stats) (Conpool.Stats.idle stats));
265
-
Log.info (fun m -> m " Created: %d, Reused: %d, Closed: %d, Errors: %d"
266
-
(Conpool.Stats.total_created stats)
267
-
(Conpool.Stats.total_reused stats)
268
-
(Conpool.Stats.total_closed stats)
269
-
(Conpool.Stats.errors stats))
323
+
Trace.test_name = config.name;
324
+
config = trace_config;
325
+
start_time = start_unix_time;
328
+
endpoint_summaries;
329
+
total_messages = latency_stats.count;
330
+
total_errors = !errors;
331
+
throughput = float_of_int latency_stats.count /. duration;
332
+
avg_latency = if latency_stats.count > 0
333
+
then latency_stats.total /. float_of_int latency_stats.count
335
+
min_latency = if latency_stats.count > 0 then latency_stats.min else 0.0;
336
+
max_latency = latency_stats.max;
272
-
(* Verify success *)
273
-
test_passed := test_stats.errors = 0 &&
274
-
test_stats.total_messages = !expected_messages;
339
+
Eio.Switch.fail sw Exit
276
-
if !test_passed then
277
-
Log.info (fun m -> m "TEST PASSED: All %d messages echoed successfully!"
278
-
!expected_messages)
280
-
Log.err (fun m -> m "TEST FAILED: Expected %d messages, got %d with %d errors"
281
-
!expected_messages test_stats.total_messages test_stats.errors);
345
+
| None -> failwith "Test failed to produce result"
283
-
(* Cancel the switch to stop servers and exit cleanly *)
284
-
Eio.Switch.fail sw Exit
347
+
(** Run all preset tests and return traces *)
348
+
let run_all_presets ~env =
349
+
List.map (fun config ->
350
+
Printf.eprintf "Running test: %s\n%!" config.name;
351
+
run_stress_test ~env config
(** Parse command line arguments *)
287
-
let parse_config () =
361
+
let parse_args () =
362
+
let mode = ref (Single default_config) in
363
+
let name = ref default_config.name in
let num_servers = ref default_config.num_servers in
let num_clients = ref default_config.num_clients in
let messages_per_client = ref default_config.messages_per_client in
let max_parallel = ref default_config.max_parallel_clients in
let message_size = ref default_config.message_size in
let pool_size = ref default_config.pool_size in
294
-
let verbose = ref false in
370
+
let output_file = ref "stress_test_results.json" in
297
-
("-s", Arg.Set_int num_servers,
298
-
Printf.sprintf "Number of echo servers (default: %d)" default_config.num_servers);
299
-
("-c", Arg.Set_int num_clients,
300
-
Printf.sprintf "Clients per server (default: %d)" default_config.num_clients);
301
-
("-m", Arg.Set_int messages_per_client,
302
-
Printf.sprintf "Messages per client (default: %d)" default_config.messages_per_client);
303
-
("-p", Arg.Set_int max_parallel,
304
-
Printf.sprintf "Max parallel clients (default: %d)" default_config.max_parallel_clients);
305
-
("-b", Arg.Set_int message_size,
306
-
Printf.sprintf "Message size in bytes (default: %d)" default_config.message_size);
307
-
("-P", Arg.Set_int pool_size,
308
-
Printf.sprintf "Pool size per endpoint (default: %d)" default_config.pool_size);
309
-
("-v", Arg.Set verbose, "Enable verbose/debug logging");
373
+
("--all", Arg.Unit (fun () -> mode := AllPresets),
374
+
"Run all preset test configurations");
375
+
("--extended", Arg.Unit (fun () -> mode := Extended),
376
+
"Run extended stress test (30 servers, 1000 clients, 100 msgs each = 3M messages)");
377
+
("--list", Arg.Unit (fun () -> mode := ListPresets),
378
+
"List available presets");
379
+
("--preset", Arg.String (fun p ->
380
+
match List.find_opt (fun c -> c.name = p) presets with
381
+
| Some c -> mode := Single c
382
+
| None -> failwith (Printf.sprintf "Unknown preset: %s" p)),
383
+
"Use a named preset configuration");
384
+
("-n", Arg.Set_string name, "Test name");
385
+
("-s", Arg.Set_int num_servers, Printf.sprintf "Number of servers (default: %d)" default_config.num_servers);
386
+
("-c", Arg.Set_int num_clients, Printf.sprintf "Clients per server (default: %d)" default_config.num_clients);
387
+
("-m", Arg.Set_int messages_per_client, Printf.sprintf "Messages per client (default: %d)" default_config.messages_per_client);
388
+
("-p", Arg.Set_int max_parallel, Printf.sprintf "Max parallel clients (default: %d)" default_config.max_parallel_clients);
389
+
("-b", Arg.Set_int message_size, Printf.sprintf "Message size (default: %d)" default_config.message_size);
390
+
("-P", Arg.Set_int pool_size, Printf.sprintf "Pool size per endpoint (default: %d)" default_config.pool_size);
391
+
("-o", Arg.Set_string output_file, "Output JSON file (default: stress_test_results.json)");
312
-
let usage = "Usage: stress_test [options]" in
394
+
let usage = "Usage: stress_test [options]\n\nOptions:" in
Arg.parse specs (fun _ -> ()) usage;
315
-
(* Configure logging *)
316
-
Logs.set_reporter (Logs_fmt.reporter ());
318
-
Logs.set_level (Some Logs.Debug)
320
-
Logs.set_level (Some Logs.Info);
num_servers = !num_servers;
num_clients = !num_clients;
messages_per_client = !messages_per_client;
max_parallel_clients = !max_parallel;
message_size = !message_size;
407
+
(!mode, config, !output_file)
333
-
let config = parse_config () in
334
-
Eio_main.run @@ fun env ->
335
-
(* Catch Exit which is used to signal clean shutdown *)
336
-
try run_stress_test ~env config
411
+
let (mode, custom_config, output_file) = parse_args () in
415
+
Printf.printf "Available presets:\n";
416
+
List.iter (fun c ->
417
+
Printf.printf " %s: %d servers, %d clients, %d msgs/client, pool=%d\n"
418
+
c.name c.num_servers c.num_clients c.messages_per_client c.pool_size
422
+
let config = if config.name = "default" then custom_config else config in
423
+
Eio_main.run @@ fun env ->
424
+
let trace = run_stress_test ~env config in
425
+
let json = Printf.sprintf "[%s]" (Trace.trace_to_json trace) in
426
+
let oc = open_out output_file in
427
+
output_string oc json;
429
+
Printf.printf "Results written to %s\n" output_file;
430
+
Printf.printf "Test: %s - %d messages, %.2f msg/s, %.2fms avg latency, %d errors\n"
431
+
trace.test_name trace.total_messages trace.throughput trace.avg_latency trace.total_errors
434
+
Eio_main.run @@ fun env ->
435
+
let traces = run_all_presets ~env in
436
+
let json = "[" ^ String.concat ",\n" (List.map Trace.trace_to_json traces) ^ "]" in
437
+
let oc = open_out output_file in
438
+
output_string oc json;
440
+
Printf.printf "Results written to %s\n" output_file;
441
+
List.iter (fun t ->
442
+
Printf.printf " %s: %d messages, %.2f msg/s, %.2fms avg latency, %d errors\n"
443
+
t.Trace.test_name t.total_messages t.throughput t.avg_latency t.total_errors
447
+
Printf.printf "Running extended stress test: %d servers, %d clients/server, %d msgs/client\n"
448
+
extended_preset.num_servers extended_preset.num_clients extended_preset.messages_per_client;
449
+
Printf.printf "Total messages: %d\n%!"
450
+
(extended_preset.num_servers * extended_preset.num_clients * extended_preset.messages_per_client);
451
+
Eio_main.run @@ fun env ->
452
+
let trace = run_stress_test ~env extended_preset in
453
+
let json = Printf.sprintf "[%s]" (Trace.trace_to_json trace) in
454
+
let oc = open_out output_file in
455
+
output_string oc json;
457
+
Printf.printf "Results written to %s\n" output_file;
458
+
Printf.printf "Test: %s - %d messages, %.2f msg/s, %.2fms avg latency, %d errors\n"
459
+
trace.test_name trace.total_messages trace.throughput trace.avg_latency trace.total_errors