open Mcp open Mcp_sdk open Mcp_server (* Multimodal example MCP server *) (* Base64 encoding helper *) module Base64 = struct let encode_char n = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".[n] let encode_block i bytes = let buffer = Buffer.create 4 in let b1 = Char.code (String.get bytes (i * 3)) in let b2 = if i * 3 + 1 < String.length bytes then Char.code (String.get bytes (i * 3 + 1)) else 0 in let b3 = if i * 3 + 2 < String.length bytes then Char.code (String.get bytes (i * 3 + 2)) else 0 in let n = (b1 lsl 16) lor (b2 lsl 8) lor b3 in Buffer.add_char buffer (encode_char ((n lsr 18) land 63)); Buffer.add_char buffer (encode_char ((n lsr 12) land 63)); if i * 3 + 1 < String.length bytes then Buffer.add_char buffer (encode_char ((n lsr 6) land 63)) else Buffer.add_char buffer '='; if i * 3 + 2 < String.length bytes then Buffer.add_char buffer (encode_char (n land 63)) else Buffer.add_char buffer '='; Buffer.contents buffer let encode data = let buffer = Buffer.create (4 * (String.length data + 2) / 3) in for i = 0 to (String.length data - 1) / 3 do Buffer.add_string buffer (encode_block i data) done; Buffer.contents buffer end (* Audio generator *) module AudioGenerator = struct (* Generate a simple sine wave *) let generate_sine_wave ~frequency ~duration ~sample_rate ~amplitude = (* WAV parameters *) let num_channels = 1 in (* Mono *) let bits_per_sample = 16 in let byte_rate = sample_rate * num_channels * bits_per_sample / 8 in let block_align = num_channels * bits_per_sample / 8 in let num_samples = int_of_float (float_of_int sample_rate *. duration) in let data_size = num_samples * block_align in (* Create buffer for the WAV data *) let buffer = Buffer.create (44 + data_size) in (* Write WAV header *) (* "RIFF" chunk *) Buffer.add_string buffer "RIFF"; let file_size = 36 + data_size in Buffer.add_char buffer (Char.chr (file_size land 0xff)); Buffer.add_char buffer (Char.chr ((file_size lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr ((file_size lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((file_size lsr 24) land 0xff)); Buffer.add_string buffer "WAVE"; (* "fmt " sub-chunk *) Buffer.add_string buffer "fmt "; Buffer.add_char buffer (Char.chr 16); (* Sub-chunk size (16 for PCM) *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 1); (* Audio format (1 for PCM) *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr num_channels); (* Number of channels *) Buffer.add_char buffer (Char.chr 0); (* Sample rate *) Buffer.add_char buffer (Char.chr (sample_rate land 0xff)); Buffer.add_char buffer (Char.chr ((sample_rate lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr ((sample_rate lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((sample_rate lsr 24) land 0xff)); (* Byte rate *) Buffer.add_char buffer (Char.chr (byte_rate land 0xff)); Buffer.add_char buffer (Char.chr ((byte_rate lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr ((byte_rate lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((byte_rate lsr 24) land 0xff)); (* Block align *) Buffer.add_char buffer (Char.chr block_align); Buffer.add_char buffer (Char.chr 0); (* Bits per sample *) Buffer.add_char buffer (Char.chr bits_per_sample); Buffer.add_char buffer (Char.chr 0); (* "data" sub-chunk *) Buffer.add_string buffer "data"; Buffer.add_char buffer (Char.chr (data_size land 0xff)); Buffer.add_char buffer (Char.chr ((data_size lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr ((data_size lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((data_size lsr 24) land 0xff)); (* Generate sine wave data *) let max_amplitude = float_of_int (1 lsl (bits_per_sample - 1)) -. 1.0 in for i = 0 to num_samples - 1 do let t = float_of_int i /. float_of_int sample_rate in let value = int_of_float (amplitude *. max_amplitude *. sin (2.0 *. Float.pi *. frequency *. t)) in (* Write 16-bit sample (little-endian) *) Buffer.add_char buffer (Char.chr (value land 0xff)); Buffer.add_char buffer (Char.chr ((value lsr 8) land 0xff)); done; Buffer.contents buffer end (* Image generator *) module ImageGenerator = struct (* Simple PNG generation *) let generate_simple_image width height color_str = (* Parse color - expected format: #RRGGBB or #RRGGBBAA *) let r, g, b, a = try if String.length color_str >= 7 && color_str.[0] = '#' then let r = int_of_string ("0x" ^ String.sub color_str 1 2) in let g = int_of_string ("0x" ^ String.sub color_str 3 2) in let b = int_of_string ("0x" ^ String.sub color_str 5 2) in let a = if String.length color_str >= 9 then int_of_string ("0x" ^ String.sub color_str 7 2) else 255 in (r, g, b, a) else (255, 0, 0, 255) (* Default to red if invalid *) with _ -> (255, 0, 0, 255) (* Default to red on parsing error *) in (* Create a very simple 1x1 PNG with the specified color *) (* PNG signature *) let signature = [137; 80; 78; 71; 13; 10; 26; 10] in (* Create buffer for the PNG data *) let buffer = Buffer.create 100 in (* PNG signature *) List.iter (fun b -> Buffer.add_char buffer (Char.chr b)) signature; (* IHDR chunk *) Buffer.add_char buffer (Char.chr 0); (* length - 13 bytes *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 13); Buffer.add_string buffer "IHDR"; (* Width *) Buffer.add_char buffer (Char.chr ((width lsr 24) land 0xff)); Buffer.add_char buffer (Char.chr ((width lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((width lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr (width land 0xff)); (* Height *) Buffer.add_char buffer (Char.chr ((height lsr 24) land 0xff)); Buffer.add_char buffer (Char.chr ((height lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((height lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr (height land 0xff)); Buffer.add_char buffer (Char.chr 8); (* Bit depth - 8 bits per channel *) Buffer.add_char buffer (Char.chr 6); (* Color type - RGBA *) Buffer.add_char buffer (Char.chr 0); (* Compression method - deflate *) Buffer.add_char buffer (Char.chr 0); (* Filter method - adaptive filtering *) Buffer.add_char buffer (Char.chr 0); (* Interlace method - no interlace *) (* IHDR CRC - precomputed for simplicity *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); (* IDAT chunk - simplified for example *) let pixels_per_row = width * 4 in let data_size = (1 + pixels_per_row) * height in Buffer.add_char buffer (Char.chr ((data_size lsr 24) land 0xff)); Buffer.add_char buffer (Char.chr ((data_size lsr 16) land 0xff)); Buffer.add_char buffer (Char.chr ((data_size lsr 8) land 0xff)); Buffer.add_char buffer (Char.chr (data_size land 0xff)); Buffer.add_string buffer "IDAT"; (* Simple zlib header *) Buffer.add_char buffer (Char.chr 0x78); Buffer.add_char buffer (Char.chr 0x01); (* Raw image data *) for _ = 0 to height - 1 do Buffer.add_char buffer (Char.chr 0); (* Filter type 0 - None *) for _ = 0 to width - 1 do Buffer.add_char buffer (Char.chr r); Buffer.add_char buffer (Char.chr g); Buffer.add_char buffer (Char.chr b); Buffer.add_char buffer (Char.chr a); done done; (* Dummy Adler32 checksum *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); (* IDAT CRC - precomputed for simplicity *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); (* IEND chunk *) Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_char buffer (Char.chr 0); Buffer.add_string buffer "IEND"; (* IEND CRC - precomputed value *) Buffer.add_char buffer (Char.chr 0xAE); Buffer.add_char buffer (Char.chr 0x42); Buffer.add_char buffer (Char.chr 0x60); Buffer.add_char buffer (Char.chr 0x82); Buffer.contents buffer end (* Helper for extracting values from JSON *) let get_param_int json name default = match json with | `Assoc fields -> begin match List.assoc_opt name fields with | Some (`Int i) -> begin i end | Some (`Float f) -> begin int_of_float f end | _ -> begin default end end | _ -> begin default end let get_param_float json name default = match json with | `Assoc fields -> begin match List.assoc_opt name fields with | Some (`Int i) -> begin float_of_int i end | Some (`Float f) -> begin f end | _ -> begin default end end | _ -> begin default end let get_param_string json name default = match json with | `Assoc fields -> begin match List.assoc_opt name fields with | Some (`String s) -> begin s end | _ -> begin default end end | _ -> begin default end (* Create a server *) let server = create_server ~name:"OCaml MCP Multimodal Example" ~version:"0.1.0" ~protocol_version:"2024-11-05" () (* Define startup and shutdown hooks *) let startup () = Printf.fprintf stderr "MultimodalServer is starting up!\n"; flush stderr; Log.info "MultimodalServer is starting up!" let shutdown () = Printf.fprintf stderr "MultimodalServer is shutting down. Goodbye!\n"; flush stderr; Log.info "MultimodalServer is shutting down. Goodbye!" (* Register the hooks *) let () = set_startup_hook server startup; set_shutdown_hook server shutdown (* Define and register a multimodal tool *) let _ = add_tool server ~name:"generate_multimodal_response" ~description:"Generates a response with text, image and audio content" ~schema_properties:[ ("message", "string", "The text message to include"); ("color", "string", "Color for the image (hex format #RRGGBB)"); ("frequency", "integer", "Frequency for the audio tone in Hz"); ] ~schema_required:["message"] (fun args -> try let message = get_param_string args "message" "Hello, multimodal world!" in let color = get_param_string args "color" "#FF0000" in let frequency = get_param_int args "frequency" 440 in (* Generate image *) let image_data = ImageGenerator.generate_simple_image 100 100 color in let image_base64 = Base64.encode image_data in (* Generate audio *) let audio_data = AudioGenerator.generate_sine_wave ~frequency:(float_of_int frequency) ~duration:1.0 ~sample_rate:8000 ~amplitude:0.8 in let audio_base64 = Base64.encode audio_data in (* Create a response with text, image and audio content *) CallToolResult.yojson_of_t CallToolResult.{ content = [ Text TextContent.{ text = message; annotations = None }; Image ImageContent.{ data = image_base64; mime_type = "image/png"; annotations = None }; Audio AudioContent.{ data = audio_base64; mime_type = "audio/wav"; annotations = None } ]; is_error = false; meta = None } with | Failure msg -> Log.error (Printf.sprintf "Error in multimodal tool: %s" msg); CallToolResult.yojson_of_t CallToolResult.{ content = [ Text TextContent.{ text = Printf.sprintf "Error: %s" msg; annotations = None } ]; is_error = true; meta = None } ) (* Define and register a multimodal prompt *) let _ = add_prompt server ~name:"multimodal-prompt" ~description:"A prompt that includes text, image, and audio" ~arguments:[ ("message", Some "Text message to include", true); ("color", Some "Color for the image (hex format #RRGGBB)", false); ("frequency", Some "Frequency for the audio tone in Hz", false); ] (fun args -> (* Parse parameters with defaults *) let message = try List.assoc "message" args with Not_found -> "Hello, multimodal world!" in let color = try List.assoc "color" args with Not_found -> "#0000FF" in let frequency = try int_of_string (List.assoc "frequency" args) with _ -> 440 in (* Generate image *) let image_data = ImageGenerator.generate_simple_image 100 100 color in let image_base64 = Base64.encode image_data in (* Generate audio *) let audio_data = AudioGenerator.generate_sine_wave ~frequency:(float_of_int frequency) ~duration:1.0 ~sample_rate:8000 ~amplitude:0.8 in let audio_base64 = Base64.encode audio_data in (* Create a multimodal prompt *) [ Prompt.{ role = `User; content = make_text_content "Here's a multimodal message with text, image, and audio:" }; Prompt.{ role = `User; content = make_text_content message }; Prompt.{ role = `User; content = make_image_content image_base64 "image/png" }; Prompt.{ role = `User; content = make_audio_content audio_base64 "audio/wav" }; Prompt.{ role = `Assistant; content = make_text_content "I've received your multimodal message with text, image, and audio." } ] ) (* Also register a resource prompt example *) let _ = add_prompt server ~name:"resource-prompt" ~description:"A prompt that includes embedded resources" ~arguments:[ ("resource_id", Some "ID of the resource to include", true); ] (fun args -> (* Sample resource texts *) let resources = [ ("doc1", "This is the content of document 1."); ("doc2", "Document 2 contains important information about OCaml."); ("doc3", "Document 3 explains the MCP protocol in detail."); ] in (* Get the requested resource *) let resource_id = try List.assoc "resource_id" args with Not_found -> "doc1" in (* Find the resource content *) let resource_content = try List.assoc resource_id resources with Not_found -> Printf.sprintf "Resource '%s' not found" resource_id in (* Create a prompt with embedded resource *) [ Prompt.{ role = `User; content = make_text_content (Printf.sprintf "Here's the content of resource %s:" resource_id) }; Prompt.{ role = `User; content = make_text_resource_content (Printf.sprintf "resource://%s" resource_id) resource_content ~mime_type:"text/plain" () }; Prompt.{ role = `User; content = make_text_content "Please analyze this content." }; Prompt.{ role = `Assistant; content = make_text_content "I'll analyze the resource content for you." } ] ) (* Main function *) let () = (* Parse command line arguments *) let transport_type = ref Stdio in let args = [ ("--http", Arg.Unit (fun () -> transport_type := Http), "Start server with HTTP transport (default is stdio)"); ] in let usage_msg = "Usage: multimodal_example [--http]" in Arg.parse args (fun _ -> ()) usage_msg; (* Configure the server with appropriate capabilities *) let server = configure_server server ~with_tools:true ~with_resources:false ~with_prompts:true () in (* Create and start MCP server with the selected transport *) let mcp_server = create ~server ~transport:!transport_type () in start mcp_server;