···
148
-
help="Path to output links file (default: links.json in git store)",
150
-
mapping_file: Optional[Path] = typer.Option(
154
-
help="Path to output URL <-> atom ID mapping file (default: url_mapping.json in git store)",
148
+
help="Path to output unified links file (default: links.json in git store)",
verbose: bool = typer.Option(
···
This command analyzes all blog entries to extract outbound links,
resolve them properly with respect to the feed's base URL, and
categorize them as internal, user, or unknown links.
163
+
Creates a unified links.json file containing all link data.
···
if verbose and entry_links:
console.print(f" Found {len(entry_links)} links in {username}:{entry.title[:50]}...")
293
-
# Determine output paths
289
+
# Determine output path
output_path = output_file
output_path = config.git_store / "links.json"
300
-
mapping_path = mapping_file
302
-
mapping_path = config.git_store / "url_mapping.json"
# Save all extracted links (not just filtered ones)
···
if hasattr(link, 'href') and link.href:
registered_urls.add(str(link.href))
327
-
# Create filtered version for URL mapping (only links to registered posts)
328
-
filtered_link_dict = {}
329
-
filtered_reverse_dict = {}
318
+
# Build unified structure with metadata
320
+
reverse_mapping = {}
for url, entry_ids in link_dict.items():
332
-
if url in registered_urls:
333
-
filtered_link_dict[url] = entry_ids
335
-
# Also update reverse mapping
336
-
for entry_id in entry_ids:
337
-
if entry_id not in filtered_reverse_dict:
338
-
filtered_reverse_dict[entry_id] = []
339
-
if url not in filtered_reverse_dict[entry_id]:
340
-
filtered_reverse_dict[entry_id].append(url)
323
+
is_tracked = url in registered_urls
324
+
target_username = None
326
+
# Find target username if this is a tracked post
328
+
for username in users:
329
+
user_domains_set = {domain for domain in user_domains.get(username, [])}
330
+
if any(domain in url for domain in user_domains_set):
331
+
target_username = username
334
+
unified_links[url] = {
335
+
"referencing_entries": entry_ids,
336
+
"is_tracked_post": is_tracked
339
+
if target_username:
340
+
unified_links[url]["target_username"] = target_username
342
+
# Build reverse mapping
343
+
for entry_id in entry_ids:
344
+
if entry_id not in reverse_mapping:
345
+
reverse_mapping[entry_id] = []
346
+
if url not in reverse_mapping[entry_id]:
347
+
reverse_mapping[entry_id].append(url)
342
-
# Use all links for main output, not filtered ones
343
-
output_data = link_dict
349
+
# Create unified output data
351
+
"links": unified_links,
352
+
"reverse_mapping": reverse_mapping,
353
+
"user_domains": {k: list(v) for k, v in user_domains.items()}
console.print(f"Found {len(registered_urls)} registered post URLs")
347
-
console.print(f"Found {len(link_dict)} total links, {len(filtered_link_dict)} links to registered posts")
358
+
console.print(f"Found {len(link_dict)} total links, {sum(1 for link in unified_links.values() if link['is_tracked_post'])} tracked posts")
349
-
# Save links data (URL -> atom ID mapping, all links)
360
+
# Save unified data
with open(output_path, "w") as f:
json.dump(output_data, f, indent=2, default=str)
353
-
# Save bidirectional mapping file (filtered)
355
-
"url_to_atom": filtered_link_dict,
356
-
"atom_to_urls": filtered_reverse_dict
359
-
with open(mapping_path, "w") as f:
360
-
json.dump(mapping_data, f, indent=2, default=str)
···
print(f"User\t{len(link_categories['user'])}\tLinks to other tracked users")
print(f"Unknown\t{len(link_categories['unknown'])}\tLinks to external sites")
print(f"Total Extracted\t{len(all_links)}\tAll extracted links")
373
-
print(f"Saved to Output\t{len(output_data)}\tLinks saved to output file")
374
-
print(f"Cross-references\t{len(filtered_link_dict)}\tLinks to registered posts only")
375
+
print(f"Saved to Output\t{len(output_data['links'])}\tLinks saved to output file")
376
+
print(f"Cross-references\t{sum(1 for link in unified_links.values() if link['is_tracked_post'])}\tLinks to registered posts only")
table = Table(title="Links Summary")
table.add_column("Category", style="cyan")
···
table.add_row("User", str(len(link_categories["user"])), "Links to other tracked users")
table.add_row("Unknown", str(len(link_categories["unknown"])), "Links to external sites")
table.add_row("Total Extracted", str(len(all_links)), "All extracted links")
385
-
table.add_row("Saved to Output", str(len(output_data)), "Links saved to output file")
386
-
table.add_row("Cross-references", str(len(filtered_link_dict)), "Links to registered posts only")
387
+
table.add_row("Saved to Output", str(len(output_data['links'])), "Links saved to output file")
388
+
table.add_row("Cross-references", str(sum(1 for link in unified_links.values() if link['is_tracked_post'])), "Links to registered posts only")
···
console.print(f" {link_pair}: {count} links")
415
-
console.print(f"\nLinks output saved to: {output_path}")
416
-
console.print(f"URL mapping saved to: {mapping_path}")
417
+
console.print(f"\nUnified links data saved to: {output_path}")
console.print(f"[red]Error extracting links: {e}[/red]")