use anyhow::Result; use std::collections::{HashMap, HashSet}; pub struct EmbeddingGenerator { word_vectors: HashMap>, concept_mappings: HashMap>, } impl EmbeddingGenerator { pub fn new() -> Result { println!("🔄 Initializing advanced semantic matching system..."); let mut generator = Self { word_vectors: HashMap::new(), concept_mappings: HashMap::new(), }; generator.build_concept_mappings(); generator.build_word_vectors(); println!( "✅ Loaded {} semantic concepts and {} word vectors", generator.concept_mappings.len(), generator.word_vectors.len() ); Ok(generator) } fn build_concept_mappings(&mut self) { let mut mappings = HashMap::new(); // Emotional concepts mappings.insert( "happy".to_string(), vec![ "joy", "bright", "sunny", "cheerful", "golden", "vibrant", "warm", "light", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "sad".to_string(), vec![ "blue", "gray", "dark", "cold", "muted", "somber", "dim", "melancholy", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "angry".to_string(), vec![ "red", "hot", "fiery", "intense", "burning", "scarlet", "crimson", "rage", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "calm".to_string(), vec![ "blue", "peaceful", "serene", "cool", "tranquil", "soft", "gentle", "quiet", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "energetic".to_string(), vec![ "bright", "vivid", "electric", "neon", "bold", "dynamic", "vibrant", "intense", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "mysterious".to_string(), vec![ "dark", "deep", "purple", "black", "shadow", "midnight", "enigmatic", "unknown", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "romantic".to_string(), vec![ "pink", "rose", "soft", "warm", "tender", "blush", "coral", "intimate", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "peaceful".to_string(), vec![ "green", "blue", "soft", "calm", "nature", "harmony", "balance", "zen", ] .iter() .map(|s| s.to_string()) .collect(), ); // Nature concepts mappings.insert( "ocean".to_string(), vec![ "blue", "teal", "cyan", "aqua", "turquoise", "deep", "wave", "water", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "forest".to_string(), vec![ "green", "brown", "earth", "natural", "deep", "emerald", "leaf", "tree", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "sunset".to_string(), vec![ "orange", "red", "gold", "warm", "coral", "amber", "glow", "horizon", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "sunrise".to_string(), vec![ "yellow", "orange", "gold", "bright", "dawn", "light", "morning", "new", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "fire".to_string(), vec![ "red", "orange", "yellow", "hot", "bright", "flame", "burn", "heat", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "ice".to_string(), vec![ "white", "blue", "silver", "cold", "crystal", "frozen", "clear", "pale", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "sky".to_string(), vec![ "blue", "light", "airy", "open", "vast", "clear", "bright", "ethereal", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "earth".to_string(), vec![ "brown", "green", "natural", "soil", "ground", "organic", "warm", "stable", ] .iter() .map(|s| s.to_string()) .collect(), ); // Seasonal concepts mappings.insert( "spring".to_string(), vec![ "green", "pink", "fresh", "new", "growth", "light", "soft", "bloom", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "summer".to_string(), vec![ "yellow", "orange", "bright", "warm", "sunny", "vibrant", "hot", "intense", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "autumn".to_string(), vec![ "orange", "red", "gold", "brown", "warm", "rich", "harvest", "cozy", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "winter".to_string(), vec![ "white", "blue", "silver", "cold", "snow", "ice", "crisp", "pure", ] .iter() .map(|s| s.to_string()) .collect(), ); // Time concepts mappings.insert( "morning".to_string(), vec![ "light", "fresh", "bright", "new", "dawn", "clear", "soft", "awakening", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "noon".to_string(), vec![ "bright", "intense", "clear", "sharp", "vivid", "strong", "direct", "bold", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "evening".to_string(), vec![ "warm", "soft", "golden", "mellow", "gentle", "amber", "cozy", "intimate", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "night".to_string(), vec![ "dark", "deep", "black", "mysterious", "quiet", "shadow", "moonlight", "serene", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "midnight".to_string(), vec![ "black", "dark", "deep", "mysterious", "intense", "shadow", "void", "profound", ] .iter() .map(|s| s.to_string()) .collect(), ); // Style concepts mappings.insert( "vintage".to_string(), vec![ "sepia", "brown", "gold", "aged", "warm", "classic", "muted", "nostalgic", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "modern".to_string(), vec![ "clean", "bright", "sharp", "minimal", "clear", "fresh", "simple", "sleek", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "rustic".to_string(), vec![ "brown", "earth", "natural", "rough", "weathered", "organic", "raw", "textured", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "elegant".to_string(), vec![ "refined", "sophisticated", "subtle", "graceful", "polished", "classic", "timeless", "luxurious", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "cyberpunk".to_string(), vec![ "neon", "electric", "bright", "artificial", "synthetic", "digital", "futuristic", "intense", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "tropical".to_string(), vec![ "bright", "vivid", "warm", "exotic", "lush", "vibrant", "paradise", "colorful", ] .iter() .map(|s| s.to_string()) .collect(), ); // Texture/feeling concepts mappings.insert( "cozy".to_string(), vec![ "warm", "soft", "comfortable", "intimate", "gentle", "welcoming", "safe", "nurturing", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "luxurious".to_string(), vec![ "rich", "opulent", "gold", "deep", "expensive", "premium", "refined", "exclusive", ] .iter() .map(|s| s.to_string()) .collect(), ); mappings.insert( "minimalist".to_string(), vec![ "clean", "simple", "pure", "empty", "sparse", "essential", "basic", "uncluttered", ] .iter() .map(|s| s.to_string()) .collect(), ); self.concept_mappings = mappings; } fn build_word_vectors(&mut self) { // Create simple but effective word vectors based on color characteristics let color_characteristics = vec![ // Brightness, Warmth, Saturation, Intensity, Calmness, Naturalness, Sophistication, Energy ("red", vec![0.8, 0.9, 0.9, 0.9, 0.1, 0.3, 0.6, 0.9]), ("blue", vec![0.6, 0.1, 0.7, 0.6, 0.9, 0.8, 0.8, 0.3]), ("green", vec![0.5, 0.4, 0.6, 0.5, 0.8, 0.9, 0.7, 0.4]), ("yellow", vec![0.9, 0.8, 0.8, 0.8, 0.6, 0.6, 0.5, 0.8]), ("orange", vec![0.8, 0.9, 0.8, 0.8, 0.3, 0.5, 0.6, 0.9]), ("purple", vec![0.6, 0.3, 0.8, 0.7, 0.7, 0.2, 0.9, 0.5]), ("pink", vec![0.7, 0.6, 0.7, 0.6, 0.7, 0.2, 0.6, 0.6]), ("brown", vec![0.3, 0.6, 0.5, 0.4, 0.6, 0.9, 0.7, 0.2]), ("black", vec![0.0, 0.2, 0.0, 0.9, 0.4, 0.3, 0.9, 0.1]), ("white", vec![1.0, 0.5, 0.0, 0.2, 0.8, 0.5, 0.8, 0.2]), ("gray", vec![0.4, 0.3, 0.2, 0.3, 0.7, 0.4, 0.8, 0.2]), ("silver", vec![0.7, 0.3, 0.3, 0.5, 0.6, 0.2, 0.9, 0.3]), ("gold", vec![0.8, 0.8, 0.7, 0.7, 0.4, 0.3, 0.8, 0.6]), ("crimson", vec![0.7, 0.9, 0.9, 0.9, 0.1, 0.2, 0.8, 0.9]), ("emerald", vec![0.6, 0.3, 0.8, 0.7, 0.8, 0.8, 0.9, 0.5]), ("sapphire", vec![0.6, 0.1, 0.9, 0.8, 0.8, 0.5, 0.9, 0.4]), ("coral", vec![0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.6, 0.7]), ("turquoise", vec![0.7, 0.2, 0.8, 0.7, 0.8, 0.7, 0.7, 0.6]), ("lavender", vec![0.8, 0.4, 0.6, 0.5, 0.9, 0.3, 0.8, 0.3]), ("indigo", vec![0.4, 0.2, 0.8, 0.8, 0.7, 0.3, 0.9, 0.4]), ]; for (word, vector) in color_characteristics { self.word_vectors.insert(word.to_string(), vector); } // Add mood/concept vectors let concept_vectors = vec![ ("bright", vec![0.9, 0.6, 0.7, 0.8, 0.6, 0.5, 0.6, 0.8]), ("dark", vec![0.1, 0.3, 0.4, 0.7, 0.4, 0.4, 0.8, 0.3]), ("warm", vec![0.7, 0.9, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5]), ("cool", vec![0.6, 0.1, 0.6, 0.5, 0.8, 0.7, 0.8, 0.3]), ("vibrant", vec![0.8, 0.7, 0.9, 0.9, 0.4, 0.5, 0.6, 0.9]), ("muted", vec![0.4, 0.5, 0.3, 0.3, 0.8, 0.7, 0.8, 0.2]), ("intense", vec![0.7, 0.6, 0.9, 0.9, 0.2, 0.4, 0.7, 0.9]), ("soft", vec![0.6, 0.6, 0.4, 0.3, 0.9, 0.6, 0.7, 0.2]), ("natural", vec![0.5, 0.5, 0.5, 0.4, 0.7, 0.9, 0.6, 0.3]), ("artificial", vec![0.7, 0.4, 0.8, 0.7, 0.3, 0.1, 0.6, 0.8]), ]; for (word, vector) in concept_vectors { self.word_vectors.insert(word.to_string(), vector); } } pub fn generate_embeddings(&self, texts: &[String]) -> Result>> { let embeddings: Vec> = texts.iter().map(|text| self.text_to_vector(text)).collect(); Ok(embeddings) } fn text_to_vector(&self, text: &str) -> Vec { let words: Vec = text .to_lowercase() .split_whitespace() .filter(|word| word.len() > 2) .map(|word| word.chars().filter(|c| c.is_alphabetic()).collect()) .filter(|word: &String| !word.is_empty()) .collect(); if words.is_empty() { return vec![0.0; 8]; // Return zero vector for empty input } let mut combined_vector = vec![0.0; 8]; let mut weight_sum = 0.0; for word in &words { // Direct word vector lookup if let Some(vector) = self.word_vectors.get(word) { for (i, &value) in vector.iter().enumerate() { combined_vector[i] += value * 2.0; // Higher weight for direct matches } weight_sum += 2.0; } // Concept mapping lookup if let Some(related_concepts) = self.concept_mappings.get(word) { for concept in related_concepts { if let Some(vector) = self.word_vectors.get(concept) { for (i, &value) in vector.iter().enumerate() { combined_vector[i] += value * 1.5; // Medium weight for concept matches } weight_sum += 1.5; } } } // Substring matching for partial word matches for (dict_word, vector) in &self.word_vectors { if word.contains(dict_word) || dict_word.contains(word) { let similarity = self.string_similarity(word, dict_word); if similarity > 0.6 { for (i, &value) in vector.iter().enumerate() { combined_vector[i] += value * similarity * 0.8; // Lower weight for partial matches } weight_sum += similarity * 0.8; } } } } // Normalize the vector if weight_sum > 0.0 { for value in combined_vector.iter_mut() { *value /= weight_sum; } } // Add some randomness based on unique word combinations to create more diverse embeddings let word_combination_hash = self.hash_word_combination(&words); for (i, value) in combined_vector.iter_mut().enumerate() { *value += (word_combination_hash.wrapping_mul((i + 1) as u64) as f32 / u64::MAX as f32 - 0.5) * 0.1; *value = value.max(0.0).min(1.0); // Clamp to [0, 1] } combined_vector } fn string_similarity(&self, s1: &str, s2: &str) -> f32 { let len1 = s1.len(); let len2 = s2.len(); if len1 == 0 || len2 == 0 { return 0.0; } let mut matrix = vec![vec![0; len2 + 1]; len1 + 1]; for i in 0..=len1 { matrix[i][0] = i; } for j in 0..=len2 { matrix[0][j] = j; } for i in 1..=len1 { for j in 1..=len2 { let cost = if s1.chars().nth(i - 1) == s2.chars().nth(j - 1) { 0 } else { 1 }; matrix[i][j] = (matrix[i - 1][j] + 1) .min(matrix[i][j - 1] + 1) .min(matrix[i - 1][j - 1] + cost); } } let max_len = len1.max(len2); 1.0 - (matrix[len1][len2] as f32 / max_len as f32) } fn hash_word_combination(&self, words: &[String]) -> u64 { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; let mut hasher = DefaultHasher::new(); for word in words { word.hash(&mut hasher); } hasher.finish() } pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { if a.len() != b.len() { return 0.0; } let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); if norm_a == 0.0 || norm_b == 0.0 { return 0.0; } dot_product / (norm_a * norm_b) } }