good_try.rs
1use anyhow::Result;
2use std::collections::{HashMap, HashSet};
3
4pub struct EmbeddingGenerator {
5 word_vectors: HashMap<String, Vec<f32>>,
6 concept_mappings: HashMap<String, Vec<String>>,
7}
8
9impl EmbeddingGenerator {
10 pub fn new() -> Result<Self> {
11 println!("🔄 Initializing advanced semantic matching system...");
12
13 let mut generator = Self {
14 word_vectors: HashMap::new(),
15 concept_mappings: HashMap::new(),
16 };
17
18 generator.build_concept_mappings();
19 generator.build_word_vectors();
20
21 println!(
22 "✅ Loaded {} semantic concepts and {} word vectors",
23 generator.concept_mappings.len(),
24 generator.word_vectors.len()
25 );
26
27 Ok(generator)
28 }
29
30 fn build_concept_mappings(&mut self) {
31 let mut mappings = HashMap::new();
32
33 // Emotional concepts
34 mappings.insert(
35 "happy".to_string(),
36 vec![
37 "joy", "bright", "sunny", "cheerful", "golden", "vibrant", "warm", "light",
38 ]
39 .iter()
40 .map(|s| s.to_string())
41 .collect(),
42 );
43 mappings.insert(
44 "sad".to_string(),
45 vec![
46 "blue",
47 "gray",
48 "dark",
49 "cold",
50 "muted",
51 "somber",
52 "dim",
53 "melancholy",
54 ]
55 .iter()
56 .map(|s| s.to_string())
57 .collect(),
58 );
59 mappings.insert(
60 "angry".to_string(),
61 vec![
62 "red", "hot", "fiery", "intense", "burning", "scarlet", "crimson", "rage",
63 ]
64 .iter()
65 .map(|s| s.to_string())
66 .collect(),
67 );
68 mappings.insert(
69 "calm".to_string(),
70 vec![
71 "blue", "peaceful", "serene", "cool", "tranquil", "soft", "gentle", "quiet",
72 ]
73 .iter()
74 .map(|s| s.to_string())
75 .collect(),
76 );
77 mappings.insert(
78 "energetic".to_string(),
79 vec![
80 "bright", "vivid", "electric", "neon", "bold", "dynamic", "vibrant", "intense",
81 ]
82 .iter()
83 .map(|s| s.to_string())
84 .collect(),
85 );
86 mappings.insert(
87 "mysterious".to_string(),
88 vec![
89 "dark",
90 "deep",
91 "purple",
92 "black",
93 "shadow",
94 "midnight",
95 "enigmatic",
96 "unknown",
97 ]
98 .iter()
99 .map(|s| s.to_string())
100 .collect(),
101 );
102 mappings.insert(
103 "romantic".to_string(),
104 vec![
105 "pink", "rose", "soft", "warm", "tender", "blush", "coral", "intimate",
106 ]
107 .iter()
108 .map(|s| s.to_string())
109 .collect(),
110 );
111 mappings.insert(
112 "peaceful".to_string(),
113 vec![
114 "green", "blue", "soft", "calm", "nature", "harmony", "balance", "zen",
115 ]
116 .iter()
117 .map(|s| s.to_string())
118 .collect(),
119 );
120
121 // Nature concepts
122 mappings.insert(
123 "ocean".to_string(),
124 vec![
125 "blue",
126 "teal",
127 "cyan",
128 "aqua",
129 "turquoise",
130 "deep",
131 "wave",
132 "water",
133 ]
134 .iter()
135 .map(|s| s.to_string())
136 .collect(),
137 );
138 mappings.insert(
139 "forest".to_string(),
140 vec![
141 "green", "brown", "earth", "natural", "deep", "emerald", "leaf", "tree",
142 ]
143 .iter()
144 .map(|s| s.to_string())
145 .collect(),
146 );
147 mappings.insert(
148 "sunset".to_string(),
149 vec![
150 "orange", "red", "gold", "warm", "coral", "amber", "glow", "horizon",
151 ]
152 .iter()
153 .map(|s| s.to_string())
154 .collect(),
155 );
156 mappings.insert(
157 "sunrise".to_string(),
158 vec![
159 "yellow", "orange", "gold", "bright", "dawn", "light", "morning", "new",
160 ]
161 .iter()
162 .map(|s| s.to_string())
163 .collect(),
164 );
165 mappings.insert(
166 "fire".to_string(),
167 vec![
168 "red", "orange", "yellow", "hot", "bright", "flame", "burn", "heat",
169 ]
170 .iter()
171 .map(|s| s.to_string())
172 .collect(),
173 );
174 mappings.insert(
175 "ice".to_string(),
176 vec![
177 "white", "blue", "silver", "cold", "crystal", "frozen", "clear", "pale",
178 ]
179 .iter()
180 .map(|s| s.to_string())
181 .collect(),
182 );
183 mappings.insert(
184 "sky".to_string(),
185 vec![
186 "blue", "light", "airy", "open", "vast", "clear", "bright", "ethereal",
187 ]
188 .iter()
189 .map(|s| s.to_string())
190 .collect(),
191 );
192 mappings.insert(
193 "earth".to_string(),
194 vec![
195 "brown", "green", "natural", "soil", "ground", "organic", "warm", "stable",
196 ]
197 .iter()
198 .map(|s| s.to_string())
199 .collect(),
200 );
201
202 // Seasonal concepts
203 mappings.insert(
204 "spring".to_string(),
205 vec![
206 "green", "pink", "fresh", "new", "growth", "light", "soft", "bloom",
207 ]
208 .iter()
209 .map(|s| s.to_string())
210 .collect(),
211 );
212 mappings.insert(
213 "summer".to_string(),
214 vec![
215 "yellow", "orange", "bright", "warm", "sunny", "vibrant", "hot", "intense",
216 ]
217 .iter()
218 .map(|s| s.to_string())
219 .collect(),
220 );
221 mappings.insert(
222 "autumn".to_string(),
223 vec![
224 "orange", "red", "gold", "brown", "warm", "rich", "harvest", "cozy",
225 ]
226 .iter()
227 .map(|s| s.to_string())
228 .collect(),
229 );
230 mappings.insert(
231 "winter".to_string(),
232 vec![
233 "white", "blue", "silver", "cold", "snow", "ice", "crisp", "pure",
234 ]
235 .iter()
236 .map(|s| s.to_string())
237 .collect(),
238 );
239
240 // Time concepts
241 mappings.insert(
242 "morning".to_string(),
243 vec![
244 "light",
245 "fresh",
246 "bright",
247 "new",
248 "dawn",
249 "clear",
250 "soft",
251 "awakening",
252 ]
253 .iter()
254 .map(|s| s.to_string())
255 .collect(),
256 );
257 mappings.insert(
258 "noon".to_string(),
259 vec![
260 "bright", "intense", "clear", "sharp", "vivid", "strong", "direct", "bold",
261 ]
262 .iter()
263 .map(|s| s.to_string())
264 .collect(),
265 );
266 mappings.insert(
267 "evening".to_string(),
268 vec![
269 "warm", "soft", "golden", "mellow", "gentle", "amber", "cozy", "intimate",
270 ]
271 .iter()
272 .map(|s| s.to_string())
273 .collect(),
274 );
275 mappings.insert(
276 "night".to_string(),
277 vec![
278 "dark",
279 "deep",
280 "black",
281 "mysterious",
282 "quiet",
283 "shadow",
284 "moonlight",
285 "serene",
286 ]
287 .iter()
288 .map(|s| s.to_string())
289 .collect(),
290 );
291 mappings.insert(
292 "midnight".to_string(),
293 vec![
294 "black",
295 "dark",
296 "deep",
297 "mysterious",
298 "intense",
299 "shadow",
300 "void",
301 "profound",
302 ]
303 .iter()
304 .map(|s| s.to_string())
305 .collect(),
306 );
307
308 // Style concepts
309 mappings.insert(
310 "vintage".to_string(),
311 vec![
312 "sepia",
313 "brown",
314 "gold",
315 "aged",
316 "warm",
317 "classic",
318 "muted",
319 "nostalgic",
320 ]
321 .iter()
322 .map(|s| s.to_string())
323 .collect(),
324 );
325 mappings.insert(
326 "modern".to_string(),
327 vec![
328 "clean", "bright", "sharp", "minimal", "clear", "fresh", "simple", "sleek",
329 ]
330 .iter()
331 .map(|s| s.to_string())
332 .collect(),
333 );
334 mappings.insert(
335 "rustic".to_string(),
336 vec![
337 "brown",
338 "earth",
339 "natural",
340 "rough",
341 "weathered",
342 "organic",
343 "raw",
344 "textured",
345 ]
346 .iter()
347 .map(|s| s.to_string())
348 .collect(),
349 );
350 mappings.insert(
351 "elegant".to_string(),
352 vec![
353 "refined",
354 "sophisticated",
355 "subtle",
356 "graceful",
357 "polished",
358 "classic",
359 "timeless",
360 "luxurious",
361 ]
362 .iter()
363 .map(|s| s.to_string())
364 .collect(),
365 );
366 mappings.insert(
367 "cyberpunk".to_string(),
368 vec![
369 "neon",
370 "electric",
371 "bright",
372 "artificial",
373 "synthetic",
374 "digital",
375 "futuristic",
376 "intense",
377 ]
378 .iter()
379 .map(|s| s.to_string())
380 .collect(),
381 );
382 mappings.insert(
383 "tropical".to_string(),
384 vec![
385 "bright", "vivid", "warm", "exotic", "lush", "vibrant", "paradise", "colorful",
386 ]
387 .iter()
388 .map(|s| s.to_string())
389 .collect(),
390 );
391
392 // Texture/feeling concepts
393 mappings.insert(
394 "cozy".to_string(),
395 vec![
396 "warm",
397 "soft",
398 "comfortable",
399 "intimate",
400 "gentle",
401 "welcoming",
402 "safe",
403 "nurturing",
404 ]
405 .iter()
406 .map(|s| s.to_string())
407 .collect(),
408 );
409 mappings.insert(
410 "luxurious".to_string(),
411 vec![
412 "rich",
413 "opulent",
414 "gold",
415 "deep",
416 "expensive",
417 "premium",
418 "refined",
419 "exclusive",
420 ]
421 .iter()
422 .map(|s| s.to_string())
423 .collect(),
424 );
425 mappings.insert(
426 "minimalist".to_string(),
427 vec![
428 "clean",
429 "simple",
430 "pure",
431 "empty",
432 "sparse",
433 "essential",
434 "basic",
435 "uncluttered",
436 ]
437 .iter()
438 .map(|s| s.to_string())
439 .collect(),
440 );
441
442 self.concept_mappings = mappings;
443 }
444
445 fn build_word_vectors(&mut self) {
446 // Create simple but effective word vectors based on color characteristics
447 let color_characteristics = vec![
448 // Brightness, Warmth, Saturation, Intensity, Calmness, Naturalness, Sophistication, Energy
449 ("red", vec![0.8, 0.9, 0.9, 0.9, 0.1, 0.3, 0.6, 0.9]),
450 ("blue", vec![0.6, 0.1, 0.7, 0.6, 0.9, 0.8, 0.8, 0.3]),
451 ("green", vec![0.5, 0.4, 0.6, 0.5, 0.8, 0.9, 0.7, 0.4]),
452 ("yellow", vec![0.9, 0.8, 0.8, 0.8, 0.6, 0.6, 0.5, 0.8]),
453 ("orange", vec![0.8, 0.9, 0.8, 0.8, 0.3, 0.5, 0.6, 0.9]),
454 ("purple", vec![0.6, 0.3, 0.8, 0.7, 0.7, 0.2, 0.9, 0.5]),
455 ("pink", vec![0.7, 0.6, 0.7, 0.6, 0.7, 0.2, 0.6, 0.6]),
456 ("brown", vec![0.3, 0.6, 0.5, 0.4, 0.6, 0.9, 0.7, 0.2]),
457 ("black", vec![0.0, 0.2, 0.0, 0.9, 0.4, 0.3, 0.9, 0.1]),
458 ("white", vec![1.0, 0.5, 0.0, 0.2, 0.8, 0.5, 0.8, 0.2]),
459 ("gray", vec![0.4, 0.3, 0.2, 0.3, 0.7, 0.4, 0.8, 0.2]),
460 ("silver", vec![0.7, 0.3, 0.3, 0.5, 0.6, 0.2, 0.9, 0.3]),
461 ("gold", vec![0.8, 0.8, 0.7, 0.7, 0.4, 0.3, 0.8, 0.6]),
462 ("crimson", vec![0.7, 0.9, 0.9, 0.9, 0.1, 0.2, 0.8, 0.9]),
463 ("emerald", vec![0.6, 0.3, 0.8, 0.7, 0.8, 0.8, 0.9, 0.5]),
464 ("sapphire", vec![0.6, 0.1, 0.9, 0.8, 0.8, 0.5, 0.9, 0.4]),
465 ("coral", vec![0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.6, 0.7]),
466 ("turquoise", vec![0.7, 0.2, 0.8, 0.7, 0.8, 0.7, 0.7, 0.6]),
467 ("lavender", vec![0.8, 0.4, 0.6, 0.5, 0.9, 0.3, 0.8, 0.3]),
468 ("indigo", vec![0.4, 0.2, 0.8, 0.8, 0.7, 0.3, 0.9, 0.4]),
469 ];
470
471 for (word, vector) in color_characteristics {
472 self.word_vectors.insert(word.to_string(), vector);
473 }
474
475 // Add mood/concept vectors
476 let concept_vectors = vec![
477 ("bright", vec![0.9, 0.6, 0.7, 0.8, 0.6, 0.5, 0.6, 0.8]),
478 ("dark", vec![0.1, 0.3, 0.4, 0.7, 0.4, 0.4, 0.8, 0.3]),
479 ("warm", vec![0.7, 0.9, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5]),
480 ("cool", vec![0.6, 0.1, 0.6, 0.5, 0.8, 0.7, 0.8, 0.3]),
481 ("vibrant", vec![0.8, 0.7, 0.9, 0.9, 0.4, 0.5, 0.6, 0.9]),
482 ("muted", vec![0.4, 0.5, 0.3, 0.3, 0.8, 0.7, 0.8, 0.2]),
483 ("intense", vec![0.7, 0.6, 0.9, 0.9, 0.2, 0.4, 0.7, 0.9]),
484 ("soft", vec![0.6, 0.6, 0.4, 0.3, 0.9, 0.6, 0.7, 0.2]),
485 ("natural", vec![0.5, 0.5, 0.5, 0.4, 0.7, 0.9, 0.6, 0.3]),
486 ("artificial", vec![0.7, 0.4, 0.8, 0.7, 0.3, 0.1, 0.6, 0.8]),
487 ];
488
489 for (word, vector) in concept_vectors {
490 self.word_vectors.insert(word.to_string(), vector);
491 }
492 }
493
494 pub fn generate_embeddings(&self, texts: &[String]) -> Result<Vec<Vec<f32>>> {
495 let embeddings: Vec<Vec<f32>> =
496 texts.iter().map(|text| self.text_to_vector(text)).collect();
497
498 Ok(embeddings)
499 }
500
501 fn text_to_vector(&self, text: &str) -> Vec<f32> {
502 let words: Vec<String> = text
503 .to_lowercase()
504 .split_whitespace()
505 .filter(|word| word.len() > 2)
506 .map(|word| word.chars().filter(|c| c.is_alphabetic()).collect())
507 .filter(|word: &String| !word.is_empty())
508 .collect();
509
510 if words.is_empty() {
511 return vec![0.0; 8]; // Return zero vector for empty input
512 }
513
514 let mut combined_vector = vec![0.0; 8];
515 let mut weight_sum = 0.0;
516
517 for word in &words {
518 // Direct word vector lookup
519 if let Some(vector) = self.word_vectors.get(word) {
520 for (i, &value) in vector.iter().enumerate() {
521 combined_vector[i] += value * 2.0; // Higher weight for direct matches
522 }
523 weight_sum += 2.0;
524 }
525
526 // Concept mapping lookup
527 if let Some(related_concepts) = self.concept_mappings.get(word) {
528 for concept in related_concepts {
529 if let Some(vector) = self.word_vectors.get(concept) {
530 for (i, &value) in vector.iter().enumerate() {
531 combined_vector[i] += value * 1.5; // Medium weight for concept matches
532 }
533 weight_sum += 1.5;
534 }
535 }
536 }
537
538 // Substring matching for partial word matches
539 for (dict_word, vector) in &self.word_vectors {
540 if word.contains(dict_word) || dict_word.contains(word) {
541 let similarity = self.string_similarity(word, dict_word);
542 if similarity > 0.6 {
543 for (i, &value) in vector.iter().enumerate() {
544 combined_vector[i] += value * similarity * 0.8; // Lower weight for partial matches
545 }
546 weight_sum += similarity * 0.8;
547 }
548 }
549 }
550 }
551
552 // Normalize the vector
553 if weight_sum > 0.0 {
554 for value in combined_vector.iter_mut() {
555 *value /= weight_sum;
556 }
557 }
558
559 // Add some randomness based on unique word combinations to create more diverse embeddings
560 let word_combination_hash = self.hash_word_combination(&words);
561 for (i, value) in combined_vector.iter_mut().enumerate() {
562 *value += (word_combination_hash.wrapping_mul((i + 1) as u64) as f32 / u64::MAX as f32
563 - 0.5)
564 * 0.1;
565 *value = value.max(0.0).min(1.0); // Clamp to [0, 1]
566 }
567
568 combined_vector
569 }
570
571 fn string_similarity(&self, s1: &str, s2: &str) -> f32 {
572 let len1 = s1.len();
573 let len2 = s2.len();
574
575 if len1 == 0 || len2 == 0 {
576 return 0.0;
577 }
578
579 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
580
581 for i in 0..=len1 {
582 matrix[i][0] = i;
583 }
584 for j in 0..=len2 {
585 matrix[0][j] = j;
586 }
587
588 for i in 1..=len1 {
589 for j in 1..=len2 {
590 let cost = if s1.chars().nth(i - 1) == s2.chars().nth(j - 1) {
591 0
592 } else {
593 1
594 };
595 matrix[i][j] = (matrix[i - 1][j] + 1)
596 .min(matrix[i][j - 1] + 1)
597 .min(matrix[i - 1][j - 1] + cost);
598 }
599 }
600
601 let max_len = len1.max(len2);
602 1.0 - (matrix[len1][len2] as f32 / max_len as f32)
603 }
604
605 fn hash_word_combination(&self, words: &[String]) -> u64 {
606 use std::collections::hash_map::DefaultHasher;
607 use std::hash::{Hash, Hasher};
608
609 let mut hasher = DefaultHasher::new();
610 for word in words {
611 word.hash(&mut hasher);
612 }
613 hasher.finish()
614 }
615
616 pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
617 if a.len() != b.len() {
618 return 0.0;
619 }
620
621 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
622 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
623 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
624
625 if norm_a == 0.0 || norm_b == 0.0 {
626 return 0.0;
627 }
628
629 dot_product / (norm_a * norm_b)
630 }
631}