A tool to scrobble tracks from your Apple Music data export to Teal.fm.
at main 13 kB view raw
1/* 2 A large portion of the recording/release determination, search query parsing, and 3 caching was done by AI. So excuse the mess lol, I had to keep asking Claude for fixes 4 after runs failed in the middle due to a bad search query/ 5*/ 6import { 7 type IArtistCredit, 8 type IRecording, 9 type IRecordingMatch, 10 type IRelease, 11 MusicBrainzApi, 12} from "musicbrainz-api"; 13import { env } from "../env"; 14import { join } from "path"; 15import crypto from "crypto"; 16import { createStorage } from "unstorage"; 17import fsDriver from "unstorage/drivers/fs"; 18 19const storage = createStorage({ 20 driver: fsDriver({ base: join(process.cwd(), "cache", "musicbrainz") }), 21}); 22 23async function cached<T>( 24 key: string, 25 fn: () => Promise<T>, 26): Promise<T> { 27 const keyHash = crypto.createHash("sha256").update(key).digest("hex"); 28 29 const cachedData = await storage.getItem(keyHash); 30 if (cachedData) { 31 console.log(`[CACHE] HIT: ${key}`); 32 return cachedData as T; 33 } 34 35 console.log(`[CACHE] MISS: ${key}`); 36 const result = await fn(); 37 38 // cast to StorageValue-compatible type to satisfy unstorage typings 39 await storage.setItem(keyHash, result as unknown as any); 40 41 return result; 42} 43 44export const mbAPI = new MusicBrainzApi({ 45 appName: "Index's Teal.fm Apple Music Importer", 46 appVersion: "0.1", 47 appContactInfo: env.MUSICBRAINZ_CONTACT_EMAIL, 48}); 49 50function sleep(ms: number) { 51 return new Promise((resolve) => setTimeout(resolve, ms)); 52} 53 54async function retry<T>( 55 fn: () => Promise<T>, 56 retries = 3, 57 delay = 1000, 58): Promise<T> { 59 try { 60 return await fn(); 61 } catch (error) { 62 if (retries > 0) { 63 console.warn( 64 `Function failed, retrying in ${delay}ms... (${retries} retries left)`, 65 ); 66 await sleep(delay); 67 return retry(fn, retries - 1, delay * 2); // Exponential backoff 68 } 69 throw error; 70 } 71} 72 73export async function lookupTrack( 74 artistName: string, 75 albumName: string, 76 trackName: string, 77): Promise<{ recording: IRecordingMatch; release: IRelease } | null> { 78 const cacheKey = `lookupTrack:${artistName}:${albumName}:${trackName}`; 79 80 return cached(cacheKey, async () => { 81 const primaryArtist = getPrimaryArtist(artistName); 82 const normalizedTrack = normalizeForSearch(stripFeatures(trackName)); 83 84 const query = await retry(() => 85 mbAPI.search("recording", { 86 query: `artist:(${primaryArtist}) AND recording:(${ 87 stripFeatures(trackName) 88 })`, 89 limit: 25, 90 }) 91 ); 92 93 console.log( 94 `artist:"${ 95 normalizeForSearch(artistName) 96 }" AND recording:"${normalizedTrack}"`, 97 ); 98 99 const candidates: Array<{ 100 recording: any; 101 release: any; 102 score: number; 103 }> = []; 104 const albumNorm = normalizeKey(albumName); 105 const trackNorm = normalizeKey(trackName); 106 107 for (const recording of query.recordings) { 108 const artists = recording["artist-credit"]; 109 if (!artists || !matchArtist(artistName, artists)) { 110 console.log("Artist doesn't match", recording.id); 111 continue; 112 } 113 114 console.log(recording.title); 115 116 if ( 117 normalizeKey(stripFeatures(recording.title)) !== 118 normalizeKey(stripFeatures(trackName)) 119 ) { 120 console.log("Not same title", recording.id); 121 continue; 122 } 123 124 for (const release of recording.releases || []) { 125 if (release.status !== "Official") { 126 console.log("Release not official", release.id); 127 continue; 128 } 129 130 const releaseNormRaw = (release.title || "").toLowerCase(); // raw text 131 const variantKeywords = [ 132 "track by track", 133 "commentary", 134 "bonus", 135 "deluxe", 136 "expanded", 137 "remix", 138 "edition", 139 "remastered", 140 "clean", 141 "instrumental", 142 "edited", 143 ]; 144 const hasVariant = variantKeywords.some((k) => 145 releaseNormRaw.includes(k) 146 ); 147 148 const releaseNorm = normalizeKey(release.title); 149 let score = 0; 150 151 // --- Perfect match (only if no variant) --- 152 if (releaseNorm === albumNorm && !hasVariant) { 153 score += 5; 154 console.log("Perfect match:", release.title); 155 return { recording, release }; 156 } 157 158 // Base scoring for partial match 159 if (releaseNorm.startsWith(albumNorm)) score += 2; 160 161 // Penalize variants 162 if (hasVariant) score -= 3; 163 164 // Bonus for release date 165 if (release.date) score += 1; 166 167 // Explicit vs Clean bias 168 const variantText = [ 169 release.title, 170 release.disambiguation, 171 recording.title, 172 recording.disambiguation, 173 ] 174 .filter(Boolean) 175 .join(" ") 176 .toLowerCase(); 177 178 if (variantText.includes("explicit")) score += 2; 179 if (variantText.includes("clean")) score -= 2; 180 181 // Log candidate score 182 console.log( 183 `${release.title} [${release.id}] | norm="${releaseNorm}" | variant=${hasVariant} | score=${score}`, 184 ); 185 186 candidates.push({ recording, release, score }); 187 } 188 } 189 190 // --- Fallback: pick best candidate --- 191 if (candidates.length > 0) { 192 candidates.sort((a, b) => b.score - a.score); 193 const topScore = candidates[0]!.score; 194 const topEqual = candidates.filter((c) => c.score === topScore); 195 196 // Tie-breaker: prefer explicit 197 let best = topEqual.find((c) => 198 ["explicit"].some((k) => 199 ( 200 c.release.title + 201 c.recording.title + 202 (c.release.disambiguation ?? "") 203 ) 204 .toLowerCase() 205 .includes(k) 206 ) 207 ); 208 209 if (!best) best = topEqual[0]!; 210 211 console.log( 212 `Best fallback match: ${best.release.title} (score ${best.score})`, 213 ); 214 return best; 215 } 216 217 console.log("No suitable match found."); 218 return null; 219 }); 220} 221 222export async function getAppleMusicURL(recording: IRecording) { 223 const cacheKey = `getAppleMusicURL:${recording.id}`; 224 225 return cached(cacheKey, async () => { 226 const query = await retry(() => 227 mbAPI.lookup("recording", recording.id, ["url-rels"]) 228 ); 229 230 const relations = query.relations; 231 if (!relations) return ""; 232 233 const appleMusic = relations.find((relation) => 234 relation.type == "streaming" && 235 relation.url?.resource.includes("music.apple.com/us/song/") 236 ); 237 238 const url = appleMusic?.url?.resource; 239 240 return url; 241 }); 242} 243 244function matchArtist(artistInput: string, artistCredits: any[]): boolean { 245 const artistInputNorm = normalizeKey(artistInput); 246 247 for (const credit of artistCredits) { 248 const mbArtist = credit.artist ?? credit; 249 if (!mbArtist?.name) continue; 250 const artistKey = normalizeKey(mbArtist.name); 251 252 // Exact match 253 if (artistKey === artistInputNorm) return true; 254 255 // Substring match (handles artist name variations) 256 if ( 257 artistInputNorm.includes(artistKey) || 258 artistKey.includes(artistInputNorm) 259 ) { 260 return true; 261 } 262 263 // Check aliases 264 for (const alias of mbArtist["alias-list"] || []) { 265 const aliasKey = normalizeKey(alias.alias); 266 if (aliasKey === artistInputNorm) return true; 267 if ( 268 artistInputNorm.includes(aliasKey) || 269 aliasKey.includes(artistInputNorm) 270 ) { 271 return true; 272 } 273 } 274 } 275 276 // Multi-artist logic (from earlier fix) 277 const separatorRegex = /\s*(?:&|feat\.?|featuring|ft\.?|with|x|,)\s*/i; 278 const inputArtists = artistInput.split(separatorRegex).map((a) => 279 normalizeKey(a.trim()) 280 ); 281 282 if (inputArtists.length > 1) { 283 const creditNames = artistCredits.map((credit) => { 284 const mbArtist = credit.artist ?? credit; 285 return normalizeKey(mbArtist?.name || ""); 286 }).filter(Boolean); 287 288 const allMatch = inputArtists.every((inputArtist) => 289 creditNames.some((creditName) => 290 creditName === inputArtist || 291 creditName.includes(inputArtist) || 292 inputArtist.includes(creditName) 293 ) 294 ); 295 296 if (allMatch) return true; 297 } 298 299 console.log( 300 "Not same artist:", 301 artistCredits.map((c) => normalizeKey((c.artist ?? c)?.name)).join( 302 ", ", 303 ), 304 "!=", 305 artistInputNorm, 306 ); 307 return false; 308} 309 310function normalizeKey(s?: string): string { 311 if (!s) return ""; 312 313 s = s.normalize("NFKD").toLowerCase().trim(); 314 s = s.normalize("NFKD").replace(/[\u0300-\u036f]/g, ""); // remove accents 315 s = s.replace(/[’‘]/g, "'").replace(/[“”]/g, '"'); 316 317 const replacements: Record<string, string> = { 318 "\\boriginal motion picture soundtrack\\b": "ost", 319 "\\boriginal soundtrack\\b": "ost", 320 "\\bsoundtrack\\b": "ost", 321 "\\bvol(\\.|ume)?\\b": "vol", 322 "\\bpart\\b": "pt", 323 "\\bparts\\b": "pt", 324 "\\bedition\\b": "", 325 "\\bthe\\b": "", 326 "\\band\\b": "", 327 "\\bep\\b": "", 328 "\\bwalt disney records\\b": "", 329 "\\blegacy collection\\b": "", 330 "\\bgreatest hits\\b": "", 331 "\\breissue(d)?\\b": "", 332 "\\bre-issue(d)?\\b": "", 333 "\\bsong of the\\b": "", 334 "\\bost\\b": "", 335 "\\bdeluxe\\b": "", 336 "\\btrack by track\\b": "", 337 "\\bcommentary\\b": "", 338 "\\bversion\\b": "", 339 }; 340 341 for (const [pattern, repl] of Object.entries(replacements)) { 342 s = s.replace(new RegExp(pattern, "gi"), repl); 343 } 344 345 // Remove parenthetical/extra info 346 s = s.replace(/\(.*?\)/g, ""); 347 s = s.replace(/\[.*?\]/g, ""); 348 s = s.replace( 349 /[-–:]\s*(remaster(ed)?|ep|single|deluxe|expanded|anniversary|commentary|track by track|bonus|edition|version).*$/gi, 350 "", 351 ); 352 s = s.replace( 353 /\s*-\s*(cover|live|remaster|remix|version|edit|single|mono|stereo|mix|feat.*)$/gi, 354 "", 355 ); 356 357 // Roman numerals → numbers 358 const romanMap: Record<string, string> = { 359 xx: "20", 360 xix: "19", 361 xviii: "18", 362 xvii: "17", 363 xvi: "16", 364 xv: "15", 365 xiv: "14", 366 xiii: "13", 367 xii: "12", 368 xi: "11", 369 x: "10", 370 ix: "9", 371 viii: "8", 372 vii: "7", 373 vi: "6", 374 v: "5", 375 iv: "4", 376 iii: "3", 377 ii: "2", 378 i: "1", 379 }; 380 for (const [roman, num] of Object.entries(romanMap)) { 381 s = s.replace(new RegExp(`\\b${roman}\\b`, "gi"), num); 382 } 383 384 // Words → numbers 385 const wordNums: Record<string, string> = { 386 one: "1", 387 two: "2", 388 three: "3", 389 four: "4", 390 five: "5", 391 six: "6", 392 seven: "7", 393 eight: "8", 394 nine: "9", 395 ten: "10", 396 eleven: "11", 397 twelve: "12", 398 thirteen: "13", 399 fourteen: "14", 400 fifteen: "15", 401 sixteen: "16", 402 seventeen: "17", 403 eighteen: "18", 404 nineteen: "19", 405 twenty: "20", 406 }; 407 for (const [word, num] of Object.entries(wordNums)) { 408 s = s.replace(new RegExp(`\\b${word}\\b`, "gi"), num); 409 } 410 411 s = s.replace(/[^\w\s]/g, ""); 412 s = s.replace(/\s+/g, ""); 413 414 return s; 415} 416 417function stripFeatures(title: string): string { 418 return title.replace(/\s*[\(\[]feat\.?.*?[\)\]]/gi, "").trim(); 419} 420 421function normalizeForSearch(str: string): string { 422 // Remove apostrophes and other punctuation that causes search issues 423 return str.replace(/['']/g, "").trim(); 424} 425 426function escapeSearchQuery(str: string): string { 427 // Escape backslashes and quotes for Lucene 428 return str.replace(/\\/g, "\\\\").replace(/"/g, '\\"'); 429} 430 431function getPrimaryArtist(artistName: string): string { 432 return artistName.split(/\s*(?:&|feat\.?|featuring|ft\.?|with|x|,)\s*/i)[0]! 433 .trim(); 434}