Manage Atom feeds in a persistent git repository

Unify links, references, and url_mapping JSON files into single structure

- Consolidate references.json, url_mapping.json, and links.json into unified links.json
- Eliminate duplication by storing each URL only once with metadata flags
- Add is_tracked_post flag and target_username to distinguish internal/external links
- Update all commands (links, index, threads, info) to use unified structure
- Maintain all existing functionality while reducing file I/O and complexity
- Update documentation and help text to reflect changes

๐Ÿค– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Changed files
+127 -86
src
thicket
+28 -25
ARCH.md
···
git-store/
โ”œโ”€โ”€ index.json # User directory index
โ”œโ”€โ”€ duplicates.json # Manual curation of duplicate entries
-
โ”œโ”€โ”€ links.json # All outbound links categorized by type
-
โ”œโ”€โ”€ references.json # Cross-reference index for threading
+
โ”œโ”€โ”€ links.json # Unified links, references, and mapping data
โ”œโ”€โ”€ user1/
โ”‚ โ”œโ”€โ”€ entry_id_1.json # Sanitized entry files
โ”‚ โ”œโ”€โ”€ entry_id_2.json
···
### Data Structures
-
#### links.json Format
+
#### links.json Format (Unified Structure)
```json
{
-
"links": [
-
{
-
"url": "https://example.com/post/123",
-
"entry_id": "https://blog.user.com/entry/456",
-
"username": "user1",
-
"context": "As mentioned in this post...",
-
"category": "user",
+
"links": {
+
"https://example.com/post/123": {
+
"referencing_entries": ["https://blog.user.com/entry/456"],
+
"is_tracked_post": true,
"target_username": "user2"
+
},
+
"https://external-site.com/article": {
+
"referencing_entries": ["https://blog.user.com/entry/789"],
+
"is_tracked_post": false
}
-
],
-
"categories": {
-
"internal": 1234,
-
"user": 456,
-
"unknown": 7890
},
-
"user_domains": {
-
"user1": ["blog.user.com", "user.com"],
-
"user2": ["example.com"]
-
}
-
}
-
```
-
-
#### references.json Format
-
```json
-
{
+
"reverse_mapping": {
+
"https://blog.user.com/entry/456": ["https://example.com/post/123"],
+
"https://blog.user.com/entry/789": ["https://external-site.com/article"]
+
},
"references": [
{
"source_entry_id": "https://blog.user.com/entry/456",
···
}
}
```
+
+
This unified structure eliminates duplication by:
+
- Storing each URL only once with metadata flags
+
- Including all link data, reference data, and mappings in one file
+
- Using `is_tracked_post` to identify internal vs external links
+
- Providing bidirectional mappings for efficient queries
+
+
### Unified Structure Benefits
+
+
- **Eliminates Duplication**: Each URL appears only once with metadata
+
- **Single Source of Truth**: All link-related data in one file
+
- **Efficient Queries**: Fast lookups for both directions (URLโ†’entries, entryโ†’URLs)
+
- **Atomic Updates**: All link data changes together
+
- **Reduced I/O**: Fewer file operations
### Implementation Benefits
+42 -11
src/thicket/cli/commands/index_cmd.py
···
None,
"--output",
"-o",
-
help="Path to output index file (default: references.json in git store)",
+
help="Path to output index file (default: updates links.json in git store)",
),
verbose: bool = typer.Option(
False,
···
This command analyzes all blog entries to detect cross-references between
different blogs, creating an index that can be used to build threaded
views of related content.
+
+
Updates the unified links.json file with reference data.
"""
try:
# Load configuration
···
if output_file:
output_path = output_file
else:
-
output_path = config.git_store / "references.json"
+
output_path = config.git_store / "links.json"
+
+
# Load existing links data or create new structure
+
if output_path.exists() and not output_file:
+
# Load existing unified structure
+
with open(output_path) as f:
+
existing_data = json.load(f)
+
else:
+
# Create new structure
+
existing_data = {
+
"links": {},
+
"reverse_mapping": {},
+
"user_domains": {}
+
}
+
+
# Update with reference data
+
existing_data["references"] = ref_index.to_dict()["references"]
+
existing_data["user_domains"] = {k: list(v) for k, v in user_domains.items()}
-
# Save reference index
+
# Save updated structure
with open(output_path, "w") as f:
-
json.dump(ref_index.to_dict(), f, indent=2, default=str)
+
json.dump(existing_data, f, indent=2, default=str)
# Show summary
if not get_tsv_mode():
···
None,
"--index",
"-i",
-
help="Path to reference index file (default: references.json in git store)",
+
help="Path to reference index file (default: links.json in git store)",
),
username: Optional[str] = typer.Option(
None,
···
This command uses the reference index to show which blog entries
are connected through cross-references, creating an email-style
threaded view of the conversation.
+
+
Reads reference data from the unified links.json file.
"""
try:
# Load configuration
···
if index_file:
index_path = index_file
else:
-
index_path = config.git_store / "references.json"
+
index_path = config.git_store / "links.json"
if not index_path.exists():
-
console.print(f"[red]Reference index not found: {index_path}[/red]")
-
console.print("Run 'thicket index' first to build the reference index")
+
console.print(f"[red]Links file not found: {index_path}[/red]")
+
console.print("Run 'thicket links' and 'thicket index' first to build the reference index")
raise typer.Exit(1)
-
# Load reference index
+
# Load unified data
with open(index_path) as f:
-
index_data = json.load(f)
+
unified_data = json.load(f)
+
+
# Check if references exist in the unified structure
+
if "references" not in unified_data:
+
console.print(f"[red]No references found in {index_path}[/red]")
+
console.print("Run 'thicket index' first to build the reference index")
+
raise typer.Exit(1)
-
ref_index = ReferenceIndex.from_dict(index_data)
+
# Extract reference data and reconstruct ReferenceIndex
+
ref_index = ReferenceIndex.from_dict({
+
"references": unified_data["references"],
+
"user_domains": unified_data.get("user_domains", {})
+
})
# Initialize Git store to get entry details
git_store = GitStore(config.git_store)
+12 -6
src/thicket/cli/commands/info_cmd.py
···
raise typer.Exit(1)
# Load reference index if available
-
references_path = config.git_store / "references.json"
+
links_path = config.git_store / "links.json"
ref_index = None
-
if references_path.exists():
-
with open(references_path) as f:
-
index_data = json.load(f)
-
ref_index = ReferenceIndex.from_dict(index_data)
+
if links_path.exists():
+
with open(links_path) as f:
+
unified_data = json.load(f)
+
+
# Check if references exist in the unified structure
+
if "references" in unified_data:
+
ref_index = ReferenceIndex.from_dict({
+
"references": unified_data["references"],
+
"user_domains": unified_data.get("user_domains", {})
+
})
# Display information
if get_tsv_mode():
···
if ref_index:
_display_link_info(entry, found_username, ref_index)
else:
-
console.print("\n[yellow]No reference index found. Run 'thicket index' to build cross-reference data.[/yellow]")
+
console.print("\n[yellow]No reference index found. Run 'thicket links' and 'thicket index' to build cross-reference data.[/yellow]")
# Optionally display content
if show_content and entry.content: