open Mcp open Mcp_sdk open Mcp_server (* WAV file format helper module *) module Wav = struct (* Simple WAV file generation for a sine wave *) let generate_sine_wave ~frequency ~duration ~sample_rate ~amplitude = (* WAV parameters *) let num_channels = 1 in (* Mono *) let bits_per_sample = 16 in let byte_rate = sample_rate * num_channels * bits_per_sample / 8 in let block_align = num_channels * bits_per_sample / 8 in let num_samples = int_of_float (float_of_int sample_rate *. duration) in let data_size = num_samples * block_align in (* Create buffer for the WAV data *) let buffer = Buffer.create (44 + data_size) in (* Write WAV header *) (* "RIFF" chunk *) Buffer.add_string buffer "RIFF"; let file_size = 36 + data_size in Buffer.add_char buffer (char_of_int (file_size land 0xff)); Buffer.add_char buffer (char_of_int ((file_size lsr 8) land 0xff)); Buffer.add_char buffer (char_of_int ((file_size lsr 16) land 0xff)); Buffer.add_char buffer (char_of_int ((file_size lsr 24) land 0xff)); Buffer.add_string buffer "WAVE"; (* "fmt " sub-chunk *) Buffer.add_string buffer "fmt "; Buffer.add_char buffer (char_of_int 16); (* Sub-chunk size (16 for PCM) *) Buffer.add_char buffer (char_of_int 0); Buffer.add_char buffer (char_of_int 0); Buffer.add_char buffer (char_of_int 0); Buffer.add_char buffer (char_of_int 1); (* Audio format (1 for PCM) *) Buffer.add_char buffer (char_of_int 0); Buffer.add_char buffer (char_of_int num_channels); (* Number of channels *) Buffer.add_char buffer (char_of_int 0); (* Sample rate *) Buffer.add_char buffer (char_of_int (sample_rate land 0xff)); Buffer.add_char buffer (char_of_int ((sample_rate lsr 8) land 0xff)); Buffer.add_char buffer (char_of_int ((sample_rate lsr 16) land 0xff)); Buffer.add_char buffer (char_of_int ((sample_rate lsr 24) land 0xff)); (* Byte rate *) Buffer.add_char buffer (char_of_int (byte_rate land 0xff)); Buffer.add_char buffer (char_of_int ((byte_rate lsr 8) land 0xff)); Buffer.add_char buffer (char_of_int ((byte_rate lsr 16) land 0xff)); Buffer.add_char buffer (char_of_int ((byte_rate lsr 24) land 0xff)); (* Block align *) Buffer.add_char buffer (char_of_int block_align); Buffer.add_char buffer (char_of_int 0); (* Bits per sample *) Buffer.add_char buffer (char_of_int bits_per_sample); Buffer.add_char buffer (char_of_int 0); (* "data" sub-chunk *) Buffer.add_string buffer "data"; Buffer.add_char buffer (char_of_int (data_size land 0xff)); Buffer.add_char buffer (char_of_int ((data_size lsr 8) land 0xff)); Buffer.add_char buffer (char_of_int ((data_size lsr 16) land 0xff)); Buffer.add_char buffer (char_of_int ((data_size lsr 24) land 0xff)); (* Generate sine wave data *) let max_amplitude = float_of_int (1 lsl (bits_per_sample - 1)) -. 1.0 in for i = 0 to num_samples - 1 do let t = float_of_int i /. float_of_int sample_rate in let value = int_of_float (amplitude *. max_amplitude *. sin (2.0 *. Float.pi *. frequency *. t)) in (* Write 16-bit sample (little-endian) *) Buffer.add_char buffer (char_of_int (value land 0xff)); Buffer.add_char buffer (char_of_int ((value lsr 8) land 0xff)); done; Buffer.contents buffer (* Encode binary data as base64 *) let base64_encode data = let buffer = Buffer.create (4 * (String.length data + 2) / 3) in let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" in let encode_block i bytes = let b1 = Char.code (String.get bytes (i * 3)) in let b2 = if i * 3 + 1 < String.length bytes then Char.code (String.get bytes (i * 3 + 1)) else 0 in let b3 = if i * 3 + 2 < String.length bytes then Char.code (String.get bytes (i * 3 + 2)) else 0 in let n = (b1 lsl 16) lor (b2 lsl 8) lor b3 in Buffer.add_char buffer (String.get alphabet ((n lsr 18) land 63)); Buffer.add_char buffer (String.get alphabet ((n lsr 12) land 63)); if i * 3 + 1 < String.length bytes then Buffer.add_char buffer (String.get alphabet ((n lsr 6) land 63)) else Buffer.add_char buffer '='; if i * 3 + 2 < String.length bytes then Buffer.add_char buffer (String.get alphabet (n land 63)) else Buffer.add_char buffer '='; in for i = 0 to (String.length data + 2) / 3 - 1 do encode_block i data done; Buffer.contents buffer end (* Helper for extracting string value from JSON *) let get_string_param json name = match json with | `Assoc fields -> (match List.assoc_opt name fields with | Some (`String value) -> value | _ -> raise (Failure (Printf.sprintf "Missing or invalid parameter: %s" name))) | _ -> raise (Failure "Expected JSON object") (* Create a server *) let server = create_server ~name:"OCaml MCP Audio Example" ~version:"0.1.0" ~protocol_version:"2024-11-05" () (* Define startup and shutdown hooks *) let startup () = (* Use stderr for direct printing to avoid interfering with JSON-RPC protocol *) Printf.fprintf stderr "AudioExampleServer is starting up!\n"; flush stderr; Log.info "AudioExampleServer is starting up!" let shutdown () = Printf.fprintf stderr "AudioExampleServer is shutting down. Goodbye!\n"; flush stderr; Log.info "AudioExampleServer is shutting down. Goodbye!" (* Register the hooks *) let () = set_startup_hook server startup; set_shutdown_hook server shutdown (* Helper to create audio content *) let make_audio_content data mime_type = let audio_content = AudioContent.{ data; mime_type; annotations = None; } in Audio audio_content (* Define and register an audio tool *) let _ = add_tool server ~name:"generate_audio_description" ~description:"Generates a description with an audio sample" ~schema_properties:[ ("text", "string", "The text to describe with audio"); ("frequency", "number", "The frequency in Hz for the tone (optional)"); ("duration", "number", "The duration in seconds for the tone (optional)"); ("amplitude", "number", "The amplitude (0.0-1.0) for the tone (optional)"); ] ~schema_required:["text"] (fun args -> try let text = get_string_param args "text" in (* Parse parameters with defaults *) let frequency = try match List.assoc_opt "frequency" (match args with `Assoc l -> l | _ -> []) with | Some (`Int f) -> float_of_int f | Some (`Float f) -> f | _ -> 440.0 (* Default to A440 *) with _ -> 440.0 in let duration = try match List.assoc_opt "duration" (match args with `Assoc l -> l | _ -> []) with | Some (`Int d) -> float_of_int d | Some (`Float d) -> d | _ -> 2.0 (* Default to 2 seconds *) with _ -> 2.0 in let amplitude = try match List.assoc_opt "amplitude" (match args with `Assoc l -> l | _ -> []) with | Some (`Int a) -> float_of_int a | Some (`Float a) -> a | _ -> 0.8 (* Default to 80% amplitude *) with _ -> 0.8 in (* Generate WAV file for the tone *) let sample_rate = 44100 in (* CD quality *) let wav_data = Wav.generate_sine_wave ~frequency ~duration ~sample_rate ~amplitude in (* Encode WAV data as base64 *) let base64_audio = Wav.base64_encode wav_data in Log.info (Printf.sprintf "Generated %d Hz tone for %.1f seconds (%.1f KB)" (int_of_float frequency) duration (float_of_int (String.length wav_data) /. 1024.0)); (* Create a response with both text and audio content *) CallToolResult.yojson_of_t CallToolResult.{ content = [ Text TextContent.{ text = Printf.sprintf "Description: %s (with %.1f Hz tone for %.1f seconds)" text frequency duration; annotations = None }; Audio AudioContent.{ data = base64_audio; mime_type = "audio/wav"; annotations = None } ]; is_error = false; meta = None } with | Failure msg -> Log.error (Printf.sprintf "Error in audio tool: %s" msg); CallToolResult.yojson_of_t CallToolResult.{ content = [ Text TextContent.{ text = Printf.sprintf "Error: %s" msg; annotations = None } ]; is_error = true; meta = None } ) (* Define and register a prompt example with audio *) let _ = add_prompt server ~name:"audio-description-prompt" ~description:"A prompt with audio and text content" ~arguments:[ ("description", Some "Text description to accompany the audio", true); ("frequency", Some "Frequency in Hz for the audio tone", false); ("duration", Some "Duration in seconds for the audio tone", false); ] (fun args -> let description = try List.assoc "description" args with Not_found -> "No description provided" in (* Parse frequency with default *) let frequency = try float_of_string (List.assoc "frequency" args) with _ -> 440.0 (* Default to A440 *) in (* Parse duration with default *) let duration = try float_of_string (List.assoc "duration" args) with _ -> 3.0 (* Default to 3 seconds *) in (* Generate WAV data *) let sample_rate = 44100 in let wav_data = Wav.generate_sine_wave ~frequency ~duration ~sample_rate ~amplitude:0.8 in (* Encode WAV data as base64 *) let base64_audio = Wav.base64_encode wav_data in Log.info (Printf.sprintf "Generated %.1f Hz tone for prompt (%.1f seconds, %.1f KB)" frequency duration (float_of_int (String.length wav_data) /. 1024.0)); [ Prompt.{ role = `User; content = make_text_content "Here's a sound sample with description:" }; Prompt.{ role = `User; content = make_audio_content base64_audio "audio/wav" }; Prompt.{ role = `User; content = make_text_content (Printf.sprintf "%s (%.1f Hz tone for %.1f seconds)" description frequency duration) }; Prompt.{ role = `Assistant; content = make_text_content "I've received your audio file and description." } ] ) (* Main function *) let () = (* Parse command line arguments *) let transport_type = ref Mcp_server.Stdio in let args = [ ("--http", Arg.Unit (fun () -> transport_type := Mcp_server.Http), "Start server with HTTP transport (default is stdio)"); ] in let usage_msg = "Usage: audio_example [--http]" in Arg.parse args (fun _ -> ()) usage_msg; (* Use stderr for direct printing to avoid interfering with JSON-RPC protocol *) Printf.fprintf stderr "Starting AudioExampleServer...\n"; flush stderr; Log.info "Starting AudioExampleServer..."; (* Configure the server with appropriate capabilities *) ignore (configure_server server ()); (* Create and start MCP server with the selected transport *) let mcp_server = Mcp_server.create ~server ~transport:!transport_type () in Mcp_server.start mcp_server