Model Context Protocol in OCaml

Add multimodal example with image, audio and text content

This commit adds a new multimodal_sdk.ml example that demonstrates:
- Generating and returning image content (PPM format)
- Generating and returning audio content (WAV format)
- Combining multiple content types in a single tool response
- Using the new error handling and tool result infrastructure
- Implementing resource templates with multimodal responses

The example includes:
- A Base64 encoder for binary data
- A PPM (simple RGB format) image generator with checkerboard pattern
- A WAV audio generator for sine wave tones
- Multiple tools using different content types

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Changed files
+266 -1
bin
+6 -1
bin/dune
···
(executable
(name capitalize_sdk)
(modules capitalize_sdk)
-
(libraries mcp mcp_server yojson eio_main eio))
+
(libraries mcp mcp_server yojson eio_main eio))
+
+
(executable
+
(name multimodal_sdk)
+
(modules multimodal_sdk)
+
(libraries mcp mcp_sdk mcp_server yojson eio_main eio))
+260
bin/multimodal_sdk.ml
···
+
open Mcp
+
open Mcp_sdk
+
+
(* Helper for extracting string value from JSON *)
+
let get_string_param json name =
+
match json with
+
| `Assoc fields ->
+
(match List.assoc_opt name fields with
+
| Some (`String value) -> value
+
| _ -> raise (Failure (Printf.sprintf "Missing or invalid parameter: %s" name)))
+
| _ -> raise (Failure "Expected JSON object")
+
+
(* Helper for extracting integer value from JSON *)
+
let get_int_param json name =
+
match json with
+
| `Assoc fields ->
+
(match List.assoc_opt name fields with
+
| Some (`Int value) -> value
+
| _ -> raise (Failure (Printf.sprintf "Missing or invalid parameter: %s" name)))
+
| _ -> raise (Failure "Expected JSON object")
+
+
(* Base64 encoding - simplified version *)
+
module Base64 = struct
+
let encode_char idx =
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".[idx]
+
+
let encode s =
+
let len = String.length s in
+
let result = Bytes.create (((len + 2) / 3) * 4) in
+
+
let rec loop i j =
+
if i >= len then j
+
else
+
let n =
+
let n = Char.code s.[i] lsl 16 in
+
let n = if i + 1 < len then n lor (Char.code s.[i+1] lsl 8) else n in
+
if i + 2 < len then n lor Char.code s.[i+2] else n
+
in
+
Bytes.set result j (encode_char ((n lsr 18) land 63));
+
Bytes.set result (j+1) (encode_char ((n lsr 12) land 63));
+
Bytes.set result (j+2)
+
(if i + 1 < len then encode_char ((n lsr 6) land 63) else '=');
+
Bytes.set result (j+3)
+
(if i + 2 < len then encode_char (n land 63) else '=');
+
loop (i + 3) (j + 4)
+
in
+
Bytes.sub_string result 0 (loop 0 0)
+
end
+
+
(* Generate a random image as PPM format (simple RGB format) *)
+
let generate_random_image width height =
+
let header = Printf.sprintf "P6\n%d %d\n255\n" width height in
+
let data = Bytes.create (width * height * 3) in
+
let step = Random.int 20 + 10 in
+
+
for y = 0 to height - 1 do
+
for x = 0 to width - 1 do
+
let pattern_val = ((x / step) + (y / step)) mod 2 in
+
let offset = (y * width + x) * 3 in
+
if pattern_val = 0 then begin
+
(* Random bright color for checkerboard *)
+
let r = 150 + Random.int 100 in
+
let g = 150 + Random.int 100 in
+
let b = 150 + Random.int 100 in
+
Bytes.set data offset (Char.chr r);
+
Bytes.set data (offset + 1) (Char.chr g);
+
Bytes.set data (offset + 2) (Char.chr b);
+
end else begin
+
(* Dark color *)
+
let r = Random.int 100 in
+
let g = Random.int 100 in
+
let b = Random.int 100 in
+
Bytes.set data offset (Char.chr r);
+
Bytes.set data (offset + 1) (Char.chr g);
+
Bytes.set data (offset + 2) (Char.chr b);
+
end
+
done
+
done;
+
+
(* Encode PPM data as Base64 *)
+
Base64.encode (header ^ Bytes.to_string data)
+
+
(* Generate a simple WAV file with sine wave *)
+
let generate_sine_wave_audio frequency duration =
+
(* WAV header *)
+
let sample_rate = 8000 in
+
let num_samples = sample_rate * duration in
+
let header = "RIFF" ^
+
String.make 4 '\000' ^ (* Size placeholder *)
+
"WAVEfmt " ^
+
String.make 4 '\016' ^ (* Subchunk1 size *)
+
String.make 2 '\001' ^ (* Audio format = 1 (PCM) *)
+
String.make 2 '\001' ^ (* Num channels = 1 (mono) *)
+
String.make 4 (Char.chr sample_rate) ^ (* Sample rate as 4 bytes *)
+
String.make 4 (Char.chr (sample_rate * 2)) ^ (* Byte rate *)
+
String.make 2 '\002' ^ (* Block align *)
+
String.make 2 '\016' ^ (* Bits per sample = 16 *)
+
"data" ^
+
String.make 4 (Char.chr (num_samples * 2)) in (* Data size *)
+
+
(* Generate sine wave samples *)
+
let samples = Bytes.create (num_samples * 2) in
+
let amplitude = 16384.0 in (* 16-bit with headroom *)
+
+
for i = 0 to num_samples - 1 do
+
let t = float_of_int i /. float_of_int sample_rate in
+
let value = amplitude *. sin (2.0 *. Float.pi *. frequency *. t) in
+
let sample = int_of_float value in
+
+
(* Convert to 16-bit little-endian *)
+
let sample = if sample < 0 then sample + 65536 else sample in
+
Bytes.set samples (i * 2) (Char.chr (sample land 0xff));
+
Bytes.set samples (i * 2 + 1) (Char.chr ((sample lsr 8) land 0xff));
+
done;
+
+
(* Encode WAV data as Base64 *)
+
Base64.encode (header ^ Bytes.to_string samples)
+
+
(* Create a server *)
+
let server = create_server
+
~name:"OCaml MCP Multimodal Example"
+
~version:"0.1.0"
+
~protocol_version:"2025-03-26" () |>
+
fun server ->
+
(* Set default capabilities *)
+
configure_server server ~with_tools:true ~with_resources:true ~with_prompts:true ()
+
+
(* Define and register a multimodal tool that returns text, images, and audio *)
+
let _ = add_tool server
+
~name:"multimodal_demo"
+
~description:"Demonstrates multimodal content with text, image, and audio"
+
~schema_properties:[
+
("width", "integer", "Width of the generated image (pixels)");
+
("height", "integer", "Height of the generated image (pixels)");
+
("frequency", "integer", "Frequency of the generated audio tone (Hz)");
+
("duration", "integer", "Duration of the generated audio (seconds)");
+
("message", "string", "Text message to include")
+
]
+
~schema_required:["message"]
+
(fun args ->
+
try
+
(* Extract parameters with defaults if not provided *)
+
let message = get_string_param args "message" in
+
let width = try get_int_param args "width" with _ -> 128 in
+
let height = try get_int_param args "height" with _ -> 128 in
+
let frequency = try get_int_param args "frequency" with _ -> 440 in
+
let duration = try get_int_param args "duration" with _ -> 1 in
+
+
(* Generate image and audio data *)
+
let image_data = generate_random_image width height in
+
let audio_data = generate_sine_wave_audio (float_of_int frequency) duration in
+
+
(* Create a multimodal tool result *)
+
create_rich_tool_result
+
~text:(Some message)
+
~image:(Some (image_data, "image/ppm"))
+
~audio:(Some (audio_data, "audio/wav"))
+
~is_error:false
+
()
+
with
+
| Failure msg ->
+
Log.error (Printf.sprintf "Error in multimodal tool: %s" msg);
+
create_tool_result [TextContent (Printf.sprintf "Error: %s" msg)] ~is_error:true
+
)
+
+
(* Define and register a tool for generating only images *)
+
let _ = add_tool server
+
~name:"generate_image"
+
~description:"Generates a random image with specified dimensions"
+
~schema_properties:[
+
("width", "integer", "Width of the generated image (pixels)");
+
("height", "integer", "Height of the generated image (pixels)")
+
]
+
~schema_required:["width"; "height"]
+
(fun args ->
+
try
+
let width = get_int_param args "width" in
+
let height = get_int_param args "height" in
+
+
if width < 1 || width > 1024 || height < 1 || height > 1024 then
+
create_tool_result
+
[TextContent "Error: Dimensions must be between 1 and 1024 pixels"]
+
~is_error:true
+
else
+
let image_data = generate_random_image width height in
+
create_tool_result
+
[ImageContent { data = image_data; mime_type = "image/ppm" }]
+
~is_error:false
+
with
+
| Failure msg ->
+
Log.error (Printf.sprintf "Error in generate_image tool: %s" msg);
+
create_tool_result [TextContent (Printf.sprintf "Error: %s" msg)] ~is_error:true
+
)
+
+
(* Define and register a tool for generating only audio *)
+
let _ = add_tool server
+
~name:"generate_audio"
+
~description:"Generates an audio tone with specified frequency and duration"
+
~schema_properties:[
+
("frequency", "integer", "Frequency of the tone in Hz (20-20000)");
+
("duration", "integer", "Duration of the tone in seconds (1-10)")
+
]
+
~schema_required:["frequency"; "duration"]
+
(fun args ->
+
try
+
let frequency = get_int_param args "frequency" in
+
let duration = get_int_param args "duration" in
+
+
if frequency < 20 || frequency > 20000 then
+
create_tool_result
+
[TextContent "Error: Frequency must be between 20Hz and 20,000Hz"]
+
~is_error:true
+
else if duration < 1 || duration > 10 then
+
create_tool_result
+
[TextContent "Error: Duration must be between 1 and 10 seconds"]
+
~is_error:true
+
else
+
let audio_data = generate_sine_wave_audio (float_of_int frequency) duration in
+
create_tool_result
+
[AudioContent { data = audio_data; mime_type = "audio/wav" }]
+
~is_error:false
+
with
+
| Failure msg ->
+
Log.error (Printf.sprintf "Error in generate_audio tool: %s" msg);
+
create_tool_result [TextContent (Printf.sprintf "Error: %s" msg)] ~is_error:true
+
)
+
+
(* Define and register a resource example with multimodal content *)
+
let _ = add_resource server
+
~uri_template:"multimodal://{name}"
+
~description:"Get a multimodal greeting with text, image and audio"
+
~mime_type:"application/json"
+
(fun params ->
+
match params with
+
| [name] ->
+
let greeting = Printf.sprintf "Hello, %s! Welcome to the multimodal MCP example." name in
+
let image_data = generate_random_image 128 128 in
+
let audio_data = generate_sine_wave_audio 440.0 1 in
+
+
Printf.sprintf {|
+
{
+
"greeting": "%s",
+
"image": {
+
"data": "%s",
+
"mimeType": "image/ppm"
+
},
+
"audio": {
+
"data": "%s",
+
"mimeType": "audio/wav"
+
}
+
}
+
|} greeting image_data audio_data
+
| _ -> Printf.sprintf {|{"error": "Invalid parameters"}|}
+
)
+
+
(* Run the server with the default scheduler *)
+
let () =
+
Random.self_init(); (* Initialize random generator *)
+
Eio_main.run @@ fun env ->
+
Mcp_server.run_server env server