good_try.rs
631 lines 19 kB view raw
1use anyhow::Result; 2use std::collections::{HashMap, HashSet}; 3 4pub struct EmbeddingGenerator { 5 word_vectors: HashMap<String, Vec<f32>>, 6 concept_mappings: HashMap<String, Vec<String>>, 7} 8 9impl EmbeddingGenerator { 10 pub fn new() -> Result<Self> { 11 println!("🔄 Initializing advanced semantic matching system..."); 12 13 let mut generator = Self { 14 word_vectors: HashMap::new(), 15 concept_mappings: HashMap::new(), 16 }; 17 18 generator.build_concept_mappings(); 19 generator.build_word_vectors(); 20 21 println!( 22 "✅ Loaded {} semantic concepts and {} word vectors", 23 generator.concept_mappings.len(), 24 generator.word_vectors.len() 25 ); 26 27 Ok(generator) 28 } 29 30 fn build_concept_mappings(&mut self) { 31 let mut mappings = HashMap::new(); 32 33 // Emotional concepts 34 mappings.insert( 35 "happy".to_string(), 36 vec![ 37 "joy", "bright", "sunny", "cheerful", "golden", "vibrant", "warm", "light", 38 ] 39 .iter() 40 .map(|s| s.to_string()) 41 .collect(), 42 ); 43 mappings.insert( 44 "sad".to_string(), 45 vec![ 46 "blue", 47 "gray", 48 "dark", 49 "cold", 50 "muted", 51 "somber", 52 "dim", 53 "melancholy", 54 ] 55 .iter() 56 .map(|s| s.to_string()) 57 .collect(), 58 ); 59 mappings.insert( 60 "angry".to_string(), 61 vec![ 62 "red", "hot", "fiery", "intense", "burning", "scarlet", "crimson", "rage", 63 ] 64 .iter() 65 .map(|s| s.to_string()) 66 .collect(), 67 ); 68 mappings.insert( 69 "calm".to_string(), 70 vec![ 71 "blue", "peaceful", "serene", "cool", "tranquil", "soft", "gentle", "quiet", 72 ] 73 .iter() 74 .map(|s| s.to_string()) 75 .collect(), 76 ); 77 mappings.insert( 78 "energetic".to_string(), 79 vec![ 80 "bright", "vivid", "electric", "neon", "bold", "dynamic", "vibrant", "intense", 81 ] 82 .iter() 83 .map(|s| s.to_string()) 84 .collect(), 85 ); 86 mappings.insert( 87 "mysterious".to_string(), 88 vec![ 89 "dark", 90 "deep", 91 "purple", 92 "black", 93 "shadow", 94 "midnight", 95 "enigmatic", 96 "unknown", 97 ] 98 .iter() 99 .map(|s| s.to_string()) 100 .collect(), 101 ); 102 mappings.insert( 103 "romantic".to_string(), 104 vec![ 105 "pink", "rose", "soft", "warm", "tender", "blush", "coral", "intimate", 106 ] 107 .iter() 108 .map(|s| s.to_string()) 109 .collect(), 110 ); 111 mappings.insert( 112 "peaceful".to_string(), 113 vec![ 114 "green", "blue", "soft", "calm", "nature", "harmony", "balance", "zen", 115 ] 116 .iter() 117 .map(|s| s.to_string()) 118 .collect(), 119 ); 120 121 // Nature concepts 122 mappings.insert( 123 "ocean".to_string(), 124 vec![ 125 "blue", 126 "teal", 127 "cyan", 128 "aqua", 129 "turquoise", 130 "deep", 131 "wave", 132 "water", 133 ] 134 .iter() 135 .map(|s| s.to_string()) 136 .collect(), 137 ); 138 mappings.insert( 139 "forest".to_string(), 140 vec![ 141 "green", "brown", "earth", "natural", "deep", "emerald", "leaf", "tree", 142 ] 143 .iter() 144 .map(|s| s.to_string()) 145 .collect(), 146 ); 147 mappings.insert( 148 "sunset".to_string(), 149 vec![ 150 "orange", "red", "gold", "warm", "coral", "amber", "glow", "horizon", 151 ] 152 .iter() 153 .map(|s| s.to_string()) 154 .collect(), 155 ); 156 mappings.insert( 157 "sunrise".to_string(), 158 vec![ 159 "yellow", "orange", "gold", "bright", "dawn", "light", "morning", "new", 160 ] 161 .iter() 162 .map(|s| s.to_string()) 163 .collect(), 164 ); 165 mappings.insert( 166 "fire".to_string(), 167 vec![ 168 "red", "orange", "yellow", "hot", "bright", "flame", "burn", "heat", 169 ] 170 .iter() 171 .map(|s| s.to_string()) 172 .collect(), 173 ); 174 mappings.insert( 175 "ice".to_string(), 176 vec![ 177 "white", "blue", "silver", "cold", "crystal", "frozen", "clear", "pale", 178 ] 179 .iter() 180 .map(|s| s.to_string()) 181 .collect(), 182 ); 183 mappings.insert( 184 "sky".to_string(), 185 vec![ 186 "blue", "light", "airy", "open", "vast", "clear", "bright", "ethereal", 187 ] 188 .iter() 189 .map(|s| s.to_string()) 190 .collect(), 191 ); 192 mappings.insert( 193 "earth".to_string(), 194 vec![ 195 "brown", "green", "natural", "soil", "ground", "organic", "warm", "stable", 196 ] 197 .iter() 198 .map(|s| s.to_string()) 199 .collect(), 200 ); 201 202 // Seasonal concepts 203 mappings.insert( 204 "spring".to_string(), 205 vec![ 206 "green", "pink", "fresh", "new", "growth", "light", "soft", "bloom", 207 ] 208 .iter() 209 .map(|s| s.to_string()) 210 .collect(), 211 ); 212 mappings.insert( 213 "summer".to_string(), 214 vec![ 215 "yellow", "orange", "bright", "warm", "sunny", "vibrant", "hot", "intense", 216 ] 217 .iter() 218 .map(|s| s.to_string()) 219 .collect(), 220 ); 221 mappings.insert( 222 "autumn".to_string(), 223 vec![ 224 "orange", "red", "gold", "brown", "warm", "rich", "harvest", "cozy", 225 ] 226 .iter() 227 .map(|s| s.to_string()) 228 .collect(), 229 ); 230 mappings.insert( 231 "winter".to_string(), 232 vec![ 233 "white", "blue", "silver", "cold", "snow", "ice", "crisp", "pure", 234 ] 235 .iter() 236 .map(|s| s.to_string()) 237 .collect(), 238 ); 239 240 // Time concepts 241 mappings.insert( 242 "morning".to_string(), 243 vec![ 244 "light", 245 "fresh", 246 "bright", 247 "new", 248 "dawn", 249 "clear", 250 "soft", 251 "awakening", 252 ] 253 .iter() 254 .map(|s| s.to_string()) 255 .collect(), 256 ); 257 mappings.insert( 258 "noon".to_string(), 259 vec![ 260 "bright", "intense", "clear", "sharp", "vivid", "strong", "direct", "bold", 261 ] 262 .iter() 263 .map(|s| s.to_string()) 264 .collect(), 265 ); 266 mappings.insert( 267 "evening".to_string(), 268 vec![ 269 "warm", "soft", "golden", "mellow", "gentle", "amber", "cozy", "intimate", 270 ] 271 .iter() 272 .map(|s| s.to_string()) 273 .collect(), 274 ); 275 mappings.insert( 276 "night".to_string(), 277 vec![ 278 "dark", 279 "deep", 280 "black", 281 "mysterious", 282 "quiet", 283 "shadow", 284 "moonlight", 285 "serene", 286 ] 287 .iter() 288 .map(|s| s.to_string()) 289 .collect(), 290 ); 291 mappings.insert( 292 "midnight".to_string(), 293 vec![ 294 "black", 295 "dark", 296 "deep", 297 "mysterious", 298 "intense", 299 "shadow", 300 "void", 301 "profound", 302 ] 303 .iter() 304 .map(|s| s.to_string()) 305 .collect(), 306 ); 307 308 // Style concepts 309 mappings.insert( 310 "vintage".to_string(), 311 vec![ 312 "sepia", 313 "brown", 314 "gold", 315 "aged", 316 "warm", 317 "classic", 318 "muted", 319 "nostalgic", 320 ] 321 .iter() 322 .map(|s| s.to_string()) 323 .collect(), 324 ); 325 mappings.insert( 326 "modern".to_string(), 327 vec![ 328 "clean", "bright", "sharp", "minimal", "clear", "fresh", "simple", "sleek", 329 ] 330 .iter() 331 .map(|s| s.to_string()) 332 .collect(), 333 ); 334 mappings.insert( 335 "rustic".to_string(), 336 vec![ 337 "brown", 338 "earth", 339 "natural", 340 "rough", 341 "weathered", 342 "organic", 343 "raw", 344 "textured", 345 ] 346 .iter() 347 .map(|s| s.to_string()) 348 .collect(), 349 ); 350 mappings.insert( 351 "elegant".to_string(), 352 vec![ 353 "refined", 354 "sophisticated", 355 "subtle", 356 "graceful", 357 "polished", 358 "classic", 359 "timeless", 360 "luxurious", 361 ] 362 .iter() 363 .map(|s| s.to_string()) 364 .collect(), 365 ); 366 mappings.insert( 367 "cyberpunk".to_string(), 368 vec![ 369 "neon", 370 "electric", 371 "bright", 372 "artificial", 373 "synthetic", 374 "digital", 375 "futuristic", 376 "intense", 377 ] 378 .iter() 379 .map(|s| s.to_string()) 380 .collect(), 381 ); 382 mappings.insert( 383 "tropical".to_string(), 384 vec![ 385 "bright", "vivid", "warm", "exotic", "lush", "vibrant", "paradise", "colorful", 386 ] 387 .iter() 388 .map(|s| s.to_string()) 389 .collect(), 390 ); 391 392 // Texture/feeling concepts 393 mappings.insert( 394 "cozy".to_string(), 395 vec![ 396 "warm", 397 "soft", 398 "comfortable", 399 "intimate", 400 "gentle", 401 "welcoming", 402 "safe", 403 "nurturing", 404 ] 405 .iter() 406 .map(|s| s.to_string()) 407 .collect(), 408 ); 409 mappings.insert( 410 "luxurious".to_string(), 411 vec![ 412 "rich", 413 "opulent", 414 "gold", 415 "deep", 416 "expensive", 417 "premium", 418 "refined", 419 "exclusive", 420 ] 421 .iter() 422 .map(|s| s.to_string()) 423 .collect(), 424 ); 425 mappings.insert( 426 "minimalist".to_string(), 427 vec![ 428 "clean", 429 "simple", 430 "pure", 431 "empty", 432 "sparse", 433 "essential", 434 "basic", 435 "uncluttered", 436 ] 437 .iter() 438 .map(|s| s.to_string()) 439 .collect(), 440 ); 441 442 self.concept_mappings = mappings; 443 } 444 445 fn build_word_vectors(&mut self) { 446 // Create simple but effective word vectors based on color characteristics 447 let color_characteristics = vec![ 448 // Brightness, Warmth, Saturation, Intensity, Calmness, Naturalness, Sophistication, Energy 449 ("red", vec![0.8, 0.9, 0.9, 0.9, 0.1, 0.3, 0.6, 0.9]), 450 ("blue", vec![0.6, 0.1, 0.7, 0.6, 0.9, 0.8, 0.8, 0.3]), 451 ("green", vec![0.5, 0.4, 0.6, 0.5, 0.8, 0.9, 0.7, 0.4]), 452 ("yellow", vec![0.9, 0.8, 0.8, 0.8, 0.6, 0.6, 0.5, 0.8]), 453 ("orange", vec![0.8, 0.9, 0.8, 0.8, 0.3, 0.5, 0.6, 0.9]), 454 ("purple", vec![0.6, 0.3, 0.8, 0.7, 0.7, 0.2, 0.9, 0.5]), 455 ("pink", vec![0.7, 0.6, 0.7, 0.6, 0.7, 0.2, 0.6, 0.6]), 456 ("brown", vec![0.3, 0.6, 0.5, 0.4, 0.6, 0.9, 0.7, 0.2]), 457 ("black", vec![0.0, 0.2, 0.0, 0.9, 0.4, 0.3, 0.9, 0.1]), 458 ("white", vec![1.0, 0.5, 0.0, 0.2, 0.8, 0.5, 0.8, 0.2]), 459 ("gray", vec![0.4, 0.3, 0.2, 0.3, 0.7, 0.4, 0.8, 0.2]), 460 ("silver", vec![0.7, 0.3, 0.3, 0.5, 0.6, 0.2, 0.9, 0.3]), 461 ("gold", vec![0.8, 0.8, 0.7, 0.7, 0.4, 0.3, 0.8, 0.6]), 462 ("crimson", vec![0.7, 0.9, 0.9, 0.9, 0.1, 0.2, 0.8, 0.9]), 463 ("emerald", vec![0.6, 0.3, 0.8, 0.7, 0.8, 0.8, 0.9, 0.5]), 464 ("sapphire", vec![0.6, 0.1, 0.9, 0.8, 0.8, 0.5, 0.9, 0.4]), 465 ("coral", vec![0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.6, 0.7]), 466 ("turquoise", vec![0.7, 0.2, 0.8, 0.7, 0.8, 0.7, 0.7, 0.6]), 467 ("lavender", vec![0.8, 0.4, 0.6, 0.5, 0.9, 0.3, 0.8, 0.3]), 468 ("indigo", vec![0.4, 0.2, 0.8, 0.8, 0.7, 0.3, 0.9, 0.4]), 469 ]; 470 471 for (word, vector) in color_characteristics { 472 self.word_vectors.insert(word.to_string(), vector); 473 } 474 475 // Add mood/concept vectors 476 let concept_vectors = vec![ 477 ("bright", vec![0.9, 0.6, 0.7, 0.8, 0.6, 0.5, 0.6, 0.8]), 478 ("dark", vec![0.1, 0.3, 0.4, 0.7, 0.4, 0.4, 0.8, 0.3]), 479 ("warm", vec![0.7, 0.9, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5]), 480 ("cool", vec![0.6, 0.1, 0.6, 0.5, 0.8, 0.7, 0.8, 0.3]), 481 ("vibrant", vec![0.8, 0.7, 0.9, 0.9, 0.4, 0.5, 0.6, 0.9]), 482 ("muted", vec![0.4, 0.5, 0.3, 0.3, 0.8, 0.7, 0.8, 0.2]), 483 ("intense", vec![0.7, 0.6, 0.9, 0.9, 0.2, 0.4, 0.7, 0.9]), 484 ("soft", vec![0.6, 0.6, 0.4, 0.3, 0.9, 0.6, 0.7, 0.2]), 485 ("natural", vec![0.5, 0.5, 0.5, 0.4, 0.7, 0.9, 0.6, 0.3]), 486 ("artificial", vec![0.7, 0.4, 0.8, 0.7, 0.3, 0.1, 0.6, 0.8]), 487 ]; 488 489 for (word, vector) in concept_vectors { 490 self.word_vectors.insert(word.to_string(), vector); 491 } 492 } 493 494 pub fn generate_embeddings(&self, texts: &[String]) -> Result<Vec<Vec<f32>>> { 495 let embeddings: Vec<Vec<f32>> = 496 texts.iter().map(|text| self.text_to_vector(text)).collect(); 497 498 Ok(embeddings) 499 } 500 501 fn text_to_vector(&self, text: &str) -> Vec<f32> { 502 let words: Vec<String> = text 503 .to_lowercase() 504 .split_whitespace() 505 .filter(|word| word.len() > 2) 506 .map(|word| word.chars().filter(|c| c.is_alphabetic()).collect()) 507 .filter(|word: &String| !word.is_empty()) 508 .collect(); 509 510 if words.is_empty() { 511 return vec![0.0; 8]; // Return zero vector for empty input 512 } 513 514 let mut combined_vector = vec![0.0; 8]; 515 let mut weight_sum = 0.0; 516 517 for word in &words { 518 // Direct word vector lookup 519 if let Some(vector) = self.word_vectors.get(word) { 520 for (i, &value) in vector.iter().enumerate() { 521 combined_vector[i] += value * 2.0; // Higher weight for direct matches 522 } 523 weight_sum += 2.0; 524 } 525 526 // Concept mapping lookup 527 if let Some(related_concepts) = self.concept_mappings.get(word) { 528 for concept in related_concepts { 529 if let Some(vector) = self.word_vectors.get(concept) { 530 for (i, &value) in vector.iter().enumerate() { 531 combined_vector[i] += value * 1.5; // Medium weight for concept matches 532 } 533 weight_sum += 1.5; 534 } 535 } 536 } 537 538 // Substring matching for partial word matches 539 for (dict_word, vector) in &self.word_vectors { 540 if word.contains(dict_word) || dict_word.contains(word) { 541 let similarity = self.string_similarity(word, dict_word); 542 if similarity > 0.6 { 543 for (i, &value) in vector.iter().enumerate() { 544 combined_vector[i] += value * similarity * 0.8; // Lower weight for partial matches 545 } 546 weight_sum += similarity * 0.8; 547 } 548 } 549 } 550 } 551 552 // Normalize the vector 553 if weight_sum > 0.0 { 554 for value in combined_vector.iter_mut() { 555 *value /= weight_sum; 556 } 557 } 558 559 // Add some randomness based on unique word combinations to create more diverse embeddings 560 let word_combination_hash = self.hash_word_combination(&words); 561 for (i, value) in combined_vector.iter_mut().enumerate() { 562 *value += (word_combination_hash.wrapping_mul((i + 1) as u64) as f32 / u64::MAX as f32 563 - 0.5) 564 * 0.1; 565 *value = value.max(0.0).min(1.0); // Clamp to [0, 1] 566 } 567 568 combined_vector 569 } 570 571 fn string_similarity(&self, s1: &str, s2: &str) -> f32 { 572 let len1 = s1.len(); 573 let len2 = s2.len(); 574 575 if len1 == 0 || len2 == 0 { 576 return 0.0; 577 } 578 579 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1]; 580 581 for i in 0..=len1 { 582 matrix[i][0] = i; 583 } 584 for j in 0..=len2 { 585 matrix[0][j] = j; 586 } 587 588 for i in 1..=len1 { 589 for j in 1..=len2 { 590 let cost = if s1.chars().nth(i - 1) == s2.chars().nth(j - 1) { 591 0 592 } else { 593 1 594 }; 595 matrix[i][j] = (matrix[i - 1][j] + 1) 596 .min(matrix[i][j - 1] + 1) 597 .min(matrix[i - 1][j - 1] + cost); 598 } 599 } 600 601 let max_len = len1.max(len2); 602 1.0 - (matrix[len1][len2] as f32 / max_len as f32) 603 } 604 605 fn hash_word_combination(&self, words: &[String]) -> u64 { 606 use std::collections::hash_map::DefaultHasher; 607 use std::hash::{Hash, Hasher}; 608 609 let mut hasher = DefaultHasher::new(); 610 for word in words { 611 word.hash(&mut hasher); 612 } 613 hasher.finish() 614 } 615 616 pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { 617 if a.len() != b.len() { 618 return 0.0; 619 } 620 621 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); 622 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt(); 623 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt(); 624 625 if norm_a == 0.0 || norm_b == 0.0 { 626 return 0.0; 627 } 628 629 dot_product / (norm_a * norm_b) 630 } 631}