···
1
+
This file is a merged representation of the entire codebase, combined into a single document by Repomix.
4
+
This section contains a summary of this file.
7
+
This file contains a packed representation of the entire repository's contents.
8
+
It is designed to be easily consumable by AI systems for analysis, code review,
9
+
or other automated processes.
13
+
The content is organized as follows:
14
+
1. This summary section
15
+
2. Repository information
16
+
3. Directory structure
17
+
4. Repository files (if enabled)
18
+
5. Multiple file entries, each consisting of:
19
+
- File path as an attribute
20
+
- Full contents of the file
24
+
- This file should be treated as read-only. Any changes should be made to the
25
+
original repository files, not this packed version.
26
+
- When processing this file, use the file path to distinguish
27
+
between different files in the repository.
28
+
- Be aware that this file may contain sensitive information. Handle it with
29
+
the same level of security as you would the original repository.
33
+
- Some files may have been excluded based on .gitignore rules and Repomix's configuration
34
+
- Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files
35
+
- Files matching patterns in .gitignore are excluded
36
+
- Files matching default ignore patterns are excluded
37
+
- Files are sorted by Git change count (files with more changes are at the bottom)
42
+
<directory_structure>
89
+
</directory_structure>
92
+
This section contains the contents of the repository's files.
94
+
<file path=".claude/settings.local.json">
106
+
"enableAllProjectMcpServers": false
110
+
<file path="src/thicket/cli/commands/generate.py">
111
+
"""Generate static HTML website from thicket data."""
117
+
from datetime import datetime
118
+
from pathlib import Path
119
+
from typing import Any, Optional, TypedDict, Union
122
+
from jinja2 import Environment, FileSystemLoader, select_autoescape
123
+
from rich.progress import Progress, SpinnerColumn, TextColumn
125
+
from ...core.git_store import GitStore
126
+
from ...models.feed import AtomEntry
127
+
from ...models.user import GitStoreIndex, UserMetadata
128
+
from ..main import app
129
+
from ..utils import console, load_config
132
+
class UserData(TypedDict):
133
+
"""Type definition for user data structure."""
135
+
metadata: UserMetadata
136
+
recent_entries: list[tuple[str, AtomEntry]]
139
+
def safe_anchor_id(atom_id: str) -> str:
140
+
"""Convert an Atom ID to a safe HTML anchor ID."""
141
+
# Use base64 URL-safe encoding without padding
142
+
encoded = base64.urlsafe_b64encode(atom_id.encode('utf-8')).decode('ascii').rstrip('=')
143
+
# Prefix with 'id' to ensure it starts with a letter (HTML requirement)
144
+
return f"id{encoded}"
147
+
class WebsiteGenerator:
148
+
"""Generate static HTML website from thicket data."""
150
+
def __init__(self, git_store: GitStore, output_dir: Path):
151
+
self.git_store = git_store
152
+
self.output_dir = output_dir
153
+
self.template_dir = Path(__file__).parent.parent.parent / "templates"
155
+
# Initialize Jinja2 environment
156
+
self.env = Environment(
157
+
loader=FileSystemLoader(self.template_dir),
158
+
autoescape=select_autoescape(["html", "xml"]),
162
+
self.index: Optional[GitStoreIndex] = None
163
+
self.entries: list[tuple[str, AtomEntry]] = [] # (username, entry)
164
+
self.links_data: Optional[dict[str, Any]] = None
165
+
self.threads: list[list[dict[str, Any]]] = [] # List of threads with metadata
167
+
def get_display_name(self, username: str) -> str:
168
+
"""Get display name for a user, falling back to username."""
169
+
if self.index and username in self.index.users:
170
+
user = self.index.users[username]
171
+
return user.display_name or username
174
+
def get_user_homepage(self, username: str) -> Optional[str]:
175
+
"""Get homepage URL for a user."""
176
+
if self.index and username in self.index.users:
177
+
user = self.index.users[username]
178
+
return str(user.homepage) if user.homepage else None
181
+
def clean_html_summary(self, content: Optional[str], max_length: int = 200) -> str:
182
+
"""Clean HTML content and truncate for display in timeline."""
187
+
clean_text = re.sub(r"<[^>]+>", " ", content)
188
+
# Replace multiple whitespace with single space
189
+
clean_text = re.sub(r"\s+", " ", clean_text)
190
+
# Strip leading/trailing whitespace
191
+
clean_text = clean_text.strip()
193
+
# Truncate with ellipsis if needed
194
+
if len(clean_text) > max_length:
195
+
# Try to break at word boundary
196
+
truncated = clean_text[:max_length]
197
+
last_space = truncated.rfind(" ")
199
+
last_space > max_length * 0.8
200
+
): # If we can break reasonably close to the limit
201
+
clean_text = truncated[:last_space] + "..."
203
+
clean_text = truncated + "..."
207
+
def load_data(self) -> None:
208
+
"""Load all data from the git repository."""
211
+
TextColumn("[progress.description]{task.description}"),
215
+
task = progress.add_task("Loading repository index...", total=None)
216
+
self.index = self.git_store._load_index()
218
+
raise ValueError("No index found in repository")
219
+
progress.update(task, completed=True)
222
+
task = progress.add_task("Loading entries...", total=None)
223
+
for username, user_metadata in self.index.users.items():
224
+
user_dir = self.git_store.repo_path / user_metadata.directory
225
+
if user_dir.exists():
226
+
for entry_file in user_dir.glob("*.json"):
227
+
if entry_file.name not in ["index.json", "duplicates.json"]:
229
+
with open(entry_file) as f:
230
+
entry_data = json.load(f)
231
+
entry = AtomEntry(**entry_data)
232
+
self.entries.append((username, entry))
233
+
except Exception as e:
235
+
f"[yellow]Warning: Failed to load {entry_file}: {e}[/yellow]"
237
+
progress.update(task, completed=True)
239
+
# Sort entries by date (newest first) - prioritize updated over published
241
+
key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True
245
+
task = progress.add_task("Loading links and references...", total=None)
246
+
links_file = self.git_store.repo_path / "links.json"
247
+
if links_file.exists():
248
+
with open(links_file) as f:
249
+
self.links_data = json.load(f)
250
+
progress.update(task, completed=True)
252
+
def build_threads(self) -> None:
253
+
"""Build threaded conversations from references."""
254
+
if not self.links_data or "references" not in self.links_data:
257
+
# Map entry IDs to (username, entry) tuples
258
+
entry_map: dict[str, tuple[str, AtomEntry]] = {}
259
+
for username, entry in self.entries:
260
+
entry_map[entry.id] = (username, entry)
262
+
# Build adjacency lists for references
263
+
self.outbound_refs: dict[str, set[str]] = {}
264
+
self.inbound_refs: dict[str, set[str]] = {}
265
+
self.reference_details: dict[
266
+
str, list[dict[str, Any]]
267
+
] = {} # Store full reference info
269
+
for ref in self.links_data["references"]:
270
+
source_id = ref["source_entry_id"]
271
+
target_id = ref.get("target_entry_id")
273
+
if target_id and source_id in entry_map and target_id in entry_map:
274
+
self.outbound_refs.setdefault(source_id, set()).add(target_id)
275
+
self.inbound_refs.setdefault(target_id, set()).add(source_id)
277
+
# Store reference details for UI
278
+
self.reference_details.setdefault(source_id, []).append(
280
+
"target_id": target_id,
281
+
"target_username": ref.get("target_username"),
282
+
"type": "outbound",
285
+
self.reference_details.setdefault(target_id, []).append(
287
+
"source_id": source_id,
288
+
"source_username": ref.get("source_username"),
293
+
# Find conversation threads (multi-post discussions)
296
+
for entry_id, (_username, _entry) in entry_map.items():
297
+
if entry_id in processed:
300
+
# Build thread starting from this entry
302
+
to_visit = [entry_id]
304
+
level_map: dict[str, int] = {} # Track levels for this thread
306
+
# First, traverse up to find the root
308
+
while current in self.inbound_refs:
309
+
parents = self.inbound_refs[current] - {
311
+
} # Exclude self-references
314
+
# Take the first parent
315
+
parent = next(iter(parents))
316
+
if parent in thread_ids: # Avoid cycles
319
+
to_visit.insert(0, current)
321
+
# Now traverse down from the root
323
+
current = to_visit.pop(0)
324
+
if current in thread_ids or current not in entry_map:
327
+
thread_ids.add(current)
328
+
username, entry = entry_map[current]
330
+
# Calculate thread level
331
+
thread_level = self._calculate_thread_level(current, level_map)
333
+
# Add threading metadata
335
+
"username": username,
336
+
"display_name": self.get_display_name(username),
338
+
"entry_id": current,
339
+
"references_to": list(self.outbound_refs.get(current, [])),
340
+
"referenced_by": list(self.inbound_refs.get(current, [])),
341
+
"thread_level": thread_level,
343
+
thread.append(thread_entry)
344
+
processed.add(current)
347
+
if current in self.outbound_refs:
348
+
children = self.outbound_refs[current] - thread_ids # Avoid cycles
349
+
to_visit.extend(sorted(children))
351
+
if len(thread) > 1: # Only keep actual threads
352
+
# Sort thread by date (newest first) - prioritize updated over published
353
+
thread.sort(key=lambda x: x["entry"].updated or x["entry"].published or datetime.min, reverse=True) # type: ignore
354
+
self.threads.append(thread)
356
+
# Sort threads by the date of their most recent entry - prioritize updated over published
359
+
item["entry"].updated or item["entry"].published or datetime.min for item in t
364
+
def _calculate_thread_level(
365
+
self, entry_id: str, processed_entries: dict[str, int]
367
+
"""Calculate indentation level for threaded display."""
368
+
if entry_id in processed_entries:
369
+
return processed_entries[entry_id]
371
+
if entry_id not in self.inbound_refs:
372
+
processed_entries[entry_id] = 0
375
+
parents_in_thread = self.inbound_refs[entry_id] & set(processed_entries.keys())
376
+
if not parents_in_thread:
377
+
processed_entries[entry_id] = 0
380
+
# Find the deepest parent level + 1
381
+
max_parent_level = 0
382
+
for parent_id in parents_in_thread:
383
+
parent_level = self._calculate_thread_level(parent_id, processed_entries)
384
+
max_parent_level = max(max_parent_level, parent_level)
386
+
level = min(max_parent_level + 1, 4) # Cap at level 4
387
+
processed_entries[entry_id] = level
390
+
def get_standalone_references(self) -> list[dict[str, Any]]:
391
+
"""Get posts that have references but aren't part of multi-post threads."""
392
+
if not hasattr(self, "reference_details"):
395
+
threaded_entry_ids = set()
396
+
for thread in self.threads:
397
+
for item in thread:
398
+
threaded_entry_ids.add(item["entry_id"])
400
+
standalone_refs = []
401
+
for username, entry in self.entries:
403
+
entry.id in self.reference_details
404
+
and entry.id not in threaded_entry_ids
406
+
refs = self.reference_details[entry.id]
407
+
# Only include if it has meaningful references (not just self-references)
408
+
meaningful_refs = [
411
+
if r.get("target_id") != entry.id and r.get("source_id") != entry.id
413
+
if meaningful_refs:
414
+
standalone_refs.append(
416
+
"username": username,
417
+
"display_name": self.get_display_name(username),
419
+
"references": meaningful_refs,
423
+
return standalone_refs
425
+
def _add_cross_thread_links(self, timeline_items: list[dict[str, Any]]) -> None:
426
+
"""Add cross-thread linking for entries that appear in multiple threads."""
427
+
# Map entry IDs to their positions in the timeline
428
+
entry_positions: dict[str, list[int]] = {}
429
+
# Map URLs referenced by entries to the entries that reference them
430
+
url_references: dict[str, list[tuple[str, int]]] = {} # url -> [(entry_id, position)]
432
+
# First pass: collect all entry IDs, their positions, and referenced URLs
433
+
for i, item in enumerate(timeline_items):
434
+
if item["type"] == "post":
435
+
entry_id = item["content"]["entry"].id
436
+
entry_positions.setdefault(entry_id, []).append(i)
437
+
# Track URLs this entry references
438
+
if entry_id in self.reference_details:
439
+
for ref in self.reference_details[entry_id]:
440
+
if ref["type"] == "outbound" and "target_id" in ref:
441
+
# Find the target entry's URL if available
442
+
target_entry = self._find_entry_by_id(ref["target_id"])
443
+
if target_entry and target_entry.link:
444
+
url = str(target_entry.link)
445
+
url_references.setdefault(url, []).append((entry_id, i))
446
+
elif item["type"] == "thread":
447
+
for thread_item in item["content"]:
448
+
entry_id = thread_item["entry"].id
449
+
entry_positions.setdefault(entry_id, []).append(i)
450
+
# Track URLs this entry references
451
+
if entry_id in self.reference_details:
452
+
for ref in self.reference_details[entry_id]:
453
+
if ref["type"] == "outbound" and "target_id" in ref:
454
+
target_entry = self._find_entry_by_id(ref["target_id"])
455
+
if target_entry and target_entry.link:
456
+
url = str(target_entry.link)
457
+
url_references.setdefault(url, []).append((entry_id, i))
459
+
# Build cross-thread connections - only for entries that actually appear multiple times
460
+
cross_thread_connections: dict[str, set[int]] = {} # entry_id -> set of timeline positions
462
+
# Add connections ONLY for entries that appear multiple times in the timeline
463
+
for entry_id, positions in entry_positions.items():
464
+
if len(positions) > 1:
465
+
cross_thread_connections[entry_id] = set(positions)
466
+
# Debug: uncomment to see which entries have multiple appearances
467
+
# print(f"Entry {entry_id[:50]}... appears at positions: {positions}")
469
+
# Apply cross-thread links to timeline items
470
+
for entry_id, positions_set in cross_thread_connections.items():
471
+
positions_list = list(positions_set)
472
+
for pos in positions_list:
473
+
item = timeline_items[pos]
474
+
other_positions = sorted([p for p in positions_list if p != pos])
476
+
if item["type"] == "post":
477
+
# Add cross-thread info to individual posts
478
+
item["content"]["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items)
479
+
# Add info about shared references
480
+
item["content"]["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items)
481
+
elif item["type"] == "thread":
482
+
# Add cross-thread info to thread items
483
+
for thread_item in item["content"]:
484
+
if thread_item["entry"].id == entry_id:
485
+
thread_item["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items)
486
+
thread_item["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items)
489
+
def _build_cross_thread_link_data(self, entry_id: str, other_positions: list[int], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
490
+
"""Build detailed cross-thread link data with anchor information."""
491
+
cross_thread_links = []
493
+
for pos in other_positions:
494
+
item = timeline_items[pos]
495
+
if item["type"] == "post":
496
+
# For individual posts
497
+
safe_id = safe_anchor_id(entry_id)
498
+
cross_thread_links.append({
500
+
"anchor_id": f"post-{pos}-{safe_id}",
501
+
"context": "individual post",
502
+
"title": item["content"]["entry"].title
504
+
elif item["type"] == "thread":
505
+
# For thread items, find the specific thread item
506
+
for thread_idx, thread_item in enumerate(item["content"]):
507
+
if thread_item["entry"].id == entry_id:
508
+
safe_id = safe_anchor_id(entry_id)
509
+
cross_thread_links.append({
511
+
"anchor_id": f"post-{pos}-{thread_idx}-{safe_id}",
512
+
"context": f"thread (level {thread_item.get('thread_level', 0)})",
513
+
"title": thread_item["entry"].title
517
+
return cross_thread_links
519
+
def _find_entry_by_id(self, entry_id: str) -> Optional[AtomEntry]:
520
+
"""Find an entry by its ID."""
521
+
for _username, entry in self.entries:
522
+
if entry.id == entry_id:
526
+
def _get_shared_references(self, entry_id: str, positions: Union[set[int], list[int]], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
527
+
"""Get information about shared references between cross-thread entries."""
530
+
# Collect all referenced URLs from entries at these positions
531
+
url_counts: dict[str, int] = {}
532
+
referencing_entries: dict[str, list[str]] = {} # url -> [entry_ids]
534
+
for pos in positions:
535
+
item = timeline_items[pos]
536
+
entries_to_check = []
538
+
if item["type"] == "post":
539
+
entries_to_check.append(item["content"]["entry"])
540
+
elif item["type"] == "thread":
541
+
entries_to_check.extend([ti["entry"] for ti in item["content"]])
543
+
for entry in entries_to_check:
544
+
if entry.id in self.reference_details:
545
+
for ref in self.reference_details[entry.id]:
546
+
if ref["type"] == "outbound" and "target_id" in ref:
547
+
target_entry = self._find_entry_by_id(ref["target_id"])
548
+
if target_entry and target_entry.link:
549
+
url = str(target_entry.link)
550
+
url_counts[url] = url_counts.get(url, 0) + 1
551
+
if url not in referencing_entries:
552
+
referencing_entries[url] = []
553
+
if entry.id not in referencing_entries[url]:
554
+
referencing_entries[url].append(entry.id)
556
+
# Find URLs referenced by multiple entries
557
+
for url, count in url_counts.items():
558
+
if count > 1 and len(referencing_entries[url]) > 1:
559
+
# Get the target entry info
560
+
target_entry = None
561
+
target_username = None
562
+
for ref in (self.links_data or {}).get("references", []):
563
+
if ref.get("target_url") == url:
564
+
target_username = ref.get("target_username")
565
+
if ref.get("target_entry_id"):
566
+
target_entry = self._find_entry_by_id(ref["target_entry_id"])
569
+
shared_refs.append({
572
+
"referencing_entries": referencing_entries[url],
573
+
"target_username": target_username,
574
+
"target_title": target_entry.title if target_entry else None
577
+
return sorted(shared_refs, key=lambda x: x["count"], reverse=True)
579
+
def generate_site(self) -> None:
580
+
"""Generate the static website."""
581
+
# Create output directory
582
+
self.output_dir.mkdir(parents=True, exist_ok=True)
584
+
# Create static directories
585
+
(self.output_dir / "css").mkdir(exist_ok=True)
586
+
(self.output_dir / "js").mkdir(exist_ok=True)
589
+
css_template = self.env.get_template("style.css")
590
+
css_content = css_template.render()
591
+
with open(self.output_dir / "css" / "style.css", "w") as f:
592
+
f.write(css_content)
594
+
# Generate JavaScript
595
+
js_template = self.env.get_template("script.js")
596
+
js_content = js_template.render()
597
+
with open(self.output_dir / "js" / "script.js", "w") as f:
598
+
f.write(js_content)
600
+
# Prepare common template data
602
+
"title": "Energy & Environment Group",
603
+
"generated_at": datetime.now().isoformat(),
604
+
"get_display_name": self.get_display_name,
605
+
"get_user_homepage": self.get_user_homepage,
606
+
"clean_html_summary": self.clean_html_summary,
607
+
"safe_anchor_id": safe_anchor_id,
610
+
# Build unified timeline
611
+
timeline_items = []
613
+
# Only consider the threads that will actually be displayed
614
+
displayed_threads = self.threads[:20] # Limit to 20 threads
616
+
# Track which entries are part of displayed threads
617
+
threaded_entry_ids = set()
618
+
for thread in displayed_threads:
619
+
for item in thread:
620
+
threaded_entry_ids.add(item["entry_id"])
622
+
# Add threads to timeline (using the date of the most recent post)
623
+
for thread in displayed_threads:
624
+
most_recent_date = max(
625
+
item["entry"].updated or item["entry"].published or datetime.min
628
+
timeline_items.append({
630
+
"date": most_recent_date,
634
+
# Add individual posts (not in threads)
635
+
for username, entry in self.entries[:50]:
636
+
if entry.id not in threaded_entry_ids:
637
+
# Check if this entry has references
639
+
entry.id in self.reference_details
640
+
if hasattr(self, "reference_details")
646
+
refs = self.reference_details.get(entry.id, [])
649
+
if r.get("target_id") != entry.id
650
+
and r.get("source_id") != entry.id
653
+
timeline_items.append({
655
+
"date": entry.updated or entry.published or datetime.min,
657
+
"username": username,
658
+
"display_name": self.get_display_name(username),
660
+
"references": refs if refs else None
664
+
# Sort unified timeline by date (newest first)
665
+
timeline_items.sort(key=lambda x: x["date"], reverse=True)
667
+
# Limit timeline to what will actually be rendered
668
+
timeline_items = timeline_items[:50] # Limit to 50 items total
670
+
# Add cross-thread linking for repeat blog references
671
+
self._add_cross_thread_links(timeline_items)
673
+
# Prepare outgoing links data
674
+
outgoing_links = []
675
+
if self.links_data and "links" in self.links_data:
676
+
for url, link_info in self.links_data["links"].items():
677
+
referencing_entries = []
678
+
for entry_id in link_info.get("referencing_entries", []):
679
+
for username, entry in self.entries:
680
+
if entry.id == entry_id:
681
+
referencing_entries.append(
682
+
(self.get_display_name(username), entry)
686
+
if referencing_entries:
687
+
# Sort by date - prioritize updated over published
688
+
referencing_entries.sort(
689
+
key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True
691
+
outgoing_links.append(
694
+
"target_username": link_info.get("target_username"),
695
+
"entries": referencing_entries,
699
+
# Sort links by most recent reference - prioritize updated over published
700
+
outgoing_links.sort(
701
+
key=lambda x: x["entries"][0][1].updated
702
+
or x["entries"][0][1].published or datetime.min,
706
+
# Prepare users data
707
+
users: list[UserData] = []
709
+
for username, user_metadata in self.index.users.items():
710
+
# Get recent entries for this user with display names
712
+
(self.get_display_name(u), e)
713
+
for u, e in self.entries
717
+
{"metadata": user_metadata, "recent_entries": user_entries}
719
+
# Sort by entry count
720
+
users.sort(key=lambda x: x["metadata"].entry_count, reverse=True)
722
+
# Generate timeline page
723
+
timeline_template = self.env.get_template("timeline.html")
724
+
timeline_content = timeline_template.render(
727
+
timeline_items=timeline_items, # Already limited above
729
+
with open(self.output_dir / "timeline.html", "w") as f:
730
+
f.write(timeline_content)
732
+
# Generate links page
733
+
links_template = self.env.get_template("links.html")
734
+
links_content = links_template.render(
737
+
outgoing_links=outgoing_links[:100],
739
+
with open(self.output_dir / "links.html", "w") as f:
740
+
f.write(links_content)
742
+
# Generate users page
743
+
users_template = self.env.get_template("users.html")
744
+
users_content = users_template.render(
749
+
with open(self.output_dir / "users.html", "w") as f:
750
+
f.write(users_content)
752
+
# Generate main index page (redirect to timeline)
753
+
index_template = self.env.get_template("index.html")
754
+
index_content = index_template.render(**base_data)
755
+
with open(self.output_dir / "index.html", "w") as f:
756
+
f.write(index_content)
758
+
console.print(f"[green]โ[/green] Generated website at {self.output_dir}")
759
+
console.print(f" - {len(self.entries)} entries")
760
+
console.print(f" - {len(self.threads)} conversation threads")
761
+
console.print(f" - {len(outgoing_links)} outgoing links")
762
+
console.print(f" - {len(users)} users")
764
+
" - Generated pages: index.html, timeline.html, links.html, users.html"
770
+
output: Path = typer.Option(
771
+
Path("./thicket-site"),
774
+
help="Output directory for the generated website",
776
+
force: bool = typer.Option(
777
+
False, "--force", "-f", help="Overwrite existing output directory"
779
+
config_file: Path = typer.Option(
780
+
Path("thicket.yaml"), "--config", help="Configuration file path"
783
+
"""Generate a static HTML website from thicket data."""
784
+
config = load_config(config_file)
786
+
if not config.git_store:
787
+
console.print("[red]No git store path configured[/red]")
788
+
raise typer.Exit(1)
790
+
git_store = GitStore(config.git_store)
792
+
# Check if output directory exists
793
+
if output.exists() and not force:
795
+
f"[red]Output directory {output} already exists. Use --force to overwrite.[/red]"
797
+
raise typer.Exit(1)
799
+
# Clean output directory if forcing
800
+
if output.exists() and force:
801
+
shutil.rmtree(output)
804
+
generator = WebsiteGenerator(git_store, output)
806
+
console.print("[bold]Generating static website...[/bold]")
807
+
generator.load_data()
808
+
generator.build_threads()
809
+
generator.generate_site()
811
+
except Exception as e:
812
+
console.print(f"[red]Error generating website: {e}[/red]")
813
+
raise typer.Exit(1) from e
816
+
<file path="src/thicket/templates/base.html">
820
+
<meta charset="UTF-8">
821
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
822
+
<title>{% block page_title %}{{ title }}{% endblock %}</title>
823
+
<link rel="stylesheet" href="css/style.css">
826
+
<header class="site-header">
827
+
<div class="header-content">
828
+
<h1 class="site-title">{{ title }}</h1>
829
+
<nav class="site-nav">
830
+
<a href="timeline.html" class="nav-link {% if page == 'timeline' %}active{% endif %}">Timeline</a>
831
+
<a href="links.html" class="nav-link {% if page == 'links' %}active{% endif %}">Links</a>
832
+
<a href="users.html" class="nav-link {% if page == 'users' %}active{% endif %}">Users</a>
837
+
<main class="main-content">
838
+
{% block content %}{% endblock %}
841
+
<footer class="site-footer">
842
+
<p>Generated on {{ generated_at }} by <a href="https://github.com/avsm/thicket">Thicket</a></p>
845
+
<script src="js/script.js"></script>
850
+
<file path="src/thicket/templates/index.html">
854
+
<meta charset="UTF-8">
855
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
856
+
<title>{{ title }}</title>
857
+
<meta http-equiv="refresh" content="0; url=timeline.html">
858
+
<link rel="canonical" href="timeline.html">
861
+
<p>Redirecting to <a href="timeline.html">Timeline</a>...</p>
866
+
<file path="src/thicket/templates/links.html">
867
+
{% extends "base.html" %}
869
+
{% block page_title %}Outgoing Links - {{ title }}{% endblock %}
871
+
{% block content %}
872
+
<div class="page-content">
873
+
<h2>Outgoing Links</h2>
874
+
<p class="page-description">External links referenced in blog posts, ordered by most recent reference.</p>
876
+
{% for link in outgoing_links %}
877
+
<article class="link-group">
878
+
<h3 class="link-url">
879
+
<a href="{{ link.url }}" target="_blank">{{ link.url|truncate(80) }}</a>
880
+
{% if link.target_username %}
881
+
<span class="target-user">({{ link.target_username }})</span>
884
+
<div class="referencing-entries">
885
+
<span class="ref-count">Referenced in {{ link.entries|length }} post(s):</span>
887
+
{% for display_name, entry in link.entries[:5] %}
889
+
<span class="author">{{ display_name }}</span> -
890
+
<a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a>
891
+
<time datetime="{{ entry.updated or entry.published }}">
892
+
({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }})
896
+
{% if link.entries|length > 5 %}
897
+
<li class="more">... and {{ link.entries|length - 5 }} more</li>
907
+
<file path="src/thicket/templates/script.js">
908
+
// Enhanced functionality for thicket website
909
+
document.addEventListener('DOMContentLoaded', function() {
911
+
// Enhance thread collapsing (optional feature)
912
+
const threadHeaders = document.querySelectorAll('.thread-header');
913
+
threadHeaders.forEach(header => {
914
+
header.style.cursor = 'pointer';
915
+
header.addEventListener('click', function() {
916
+
const thread = this.parentElement;
917
+
const entries = thread.querySelectorAll('.thread-entry');
919
+
// Toggle visibility of all but the first entry
920
+
for (let i = 1; i < entries.length; i++) {
921
+
entries[i].style.display = entries[i].style.display === 'none' ? 'block' : 'none';
924
+
// Update thread count text
925
+
const count = this.querySelector('.thread-count');
926
+
if (entries[1] && entries[1].style.display === 'none') {
927
+
count.textContent = count.textContent.replace('posts', 'posts (collapsed)');
929
+
count.textContent = count.textContent.replace(' (collapsed)', '');
934
+
// Add relative time display
935
+
const timeElements = document.querySelectorAll('time');
936
+
timeElements.forEach(timeEl => {
937
+
const datetime = new Date(timeEl.getAttribute('datetime'));
938
+
const now = new Date();
939
+
const diffMs = now - datetime;
940
+
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
943
+
if (diffDays === 0) {
944
+
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
945
+
if (diffHours === 0) {
946
+
const diffMinutes = Math.floor(diffMs / (1000 * 60));
947
+
relativeTime = diffMinutes === 0 ? 'just now' : `${diffMinutes}m ago`;
949
+
relativeTime = `${diffHours}h ago`;
951
+
} else if (diffDays === 1) {
952
+
relativeTime = 'yesterday';
953
+
} else if (diffDays < 7) {
954
+
relativeTime = `${diffDays}d ago`;
955
+
} else if (diffDays < 30) {
956
+
const weeks = Math.floor(diffDays / 7);
957
+
relativeTime = weeks === 1 ? '1w ago' : `${weeks}w ago`;
958
+
} else if (diffDays < 365) {
959
+
const months = Math.floor(diffDays / 30);
960
+
relativeTime = months === 1 ? '1mo ago' : `${months}mo ago`;
962
+
const years = Math.floor(diffDays / 365);
963
+
relativeTime = years === 1 ? '1y ago' : `${years}y ago`;
966
+
// Add relative time as title attribute
967
+
timeEl.setAttribute('title', timeEl.textContent);
968
+
timeEl.textContent = relativeTime;
971
+
// Enhanced anchor link scrolling for shared references
972
+
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
973
+
anchor.addEventListener('click', function (e) {
974
+
e.preventDefault();
975
+
const target = document.querySelector(this.getAttribute('href'));
977
+
target.scrollIntoView({
978
+
behavior: 'smooth',
982
+
// Highlight the target briefly
983
+
const timelineEntry = target.closest('.timeline-entry');
984
+
if (timelineEntry) {
985
+
timelineEntry.style.outline = '2px solid var(--primary-color)';
986
+
timelineEntry.style.borderRadius = '8px';
988
+
timelineEntry.style.outline = '';
989
+
timelineEntry.style.borderRadius = '';
998
+
<file path="src/thicket/templates/style.css">
999
+
/* Modern, clean design with high-density text and readable theme */
1002
+
--primary-color: #2c3e50;
1003
+
--secondary-color: #3498db;
1004
+
--accent-color: #e74c3c;
1005
+
--background: #ffffff;
1006
+
--surface: #f8f9fa;
1007
+
--text-primary: #2c3e50;
1008
+
--text-secondary: #7f8c8d;
1009
+
--border-color: #e0e0e0;
1010
+
--thread-indent: 20px;
1011
+
--max-width: 1200px;
1017
+
box-sizing: border-box;
1021
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
1024
+
color: var(--text-primary);
1025
+
background-color: var(--background);
1030
+
background-color: var(--surface);
1031
+
border-bottom: 1px solid var(--border-color);
1032
+
padding: 0.75rem 0;
1039
+
max-width: var(--max-width);
1043
+
justify-content: space-between;
1044
+
align-items: center;
1048
+
font-size: 1.5rem;
1050
+
color: var(--primary-color);
1061
+
text-decoration: none;
1062
+
color: var(--text-secondary);
1064
+
font-size: 0.95rem;
1065
+
padding: 0.5rem 0.75rem;
1066
+
border-radius: 4px;
1067
+
transition: all 0.2s ease;
1071
+
color: var(--primary-color);
1072
+
background-color: var(--background);
1075
+
.nav-link.active {
1076
+
color: var(--secondary-color);
1077
+
background-color: var(--background);
1081
+
/* Main Content */
1083
+
max-width: var(--max-width);
1084
+
margin: 2rem auto;
1092
+
.page-description {
1093
+
color: var(--text-secondary);
1094
+
margin-bottom: 1.5rem;
1095
+
font-style: italic;
1100
+
margin-bottom: 2rem;
1104
+
font-size: 1.3rem;
1106
+
margin-bottom: 0.75rem;
1107
+
color: var(--primary-color);
1111
+
font-size: 1.1rem;
1113
+
margin-bottom: 0.75rem;
1114
+
color: var(--primary-color);
1117
+
/* Entries and Threads */
1119
+
margin-bottom: 1.5rem;
1121
+
background-color: var(--surface);
1122
+
border-radius: 4px;
1123
+
border: 1px solid var(--border-color);
1126
+
/* Timeline-style entries */
1128
+
margin-bottom: 0.5rem;
1129
+
padding: 0.5rem 0.75rem;
1131
+
background: transparent;
1132
+
transition: background-color 0.2s ease;
1135
+
.timeline-entry:hover {
1136
+
background-color: var(--surface);
1140
+
display: inline-flex;
1142
+
align-items: center;
1143
+
font-size: 0.75rem;
1144
+
color: var(--text-secondary);
1145
+
margin-bottom: 0.25rem;
1149
+
font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
1150
+
font-size: 0.75rem;
1151
+
color: var(--text-secondary);
1154
+
.timeline-author {
1156
+
color: var(--primary-color);
1157
+
font-size: 0.8rem;
1158
+
text-decoration: none;
1161
+
.timeline-author:hover {
1162
+
color: var(--secondary-color);
1163
+
text-decoration: underline;
1166
+
.timeline-content {
1171
+
font-size: 0.95rem;
1175
+
.timeline-title a {
1176
+
color: var(--primary-color);
1177
+
text-decoration: none;
1180
+
.timeline-title a:hover {
1181
+
color: var(--secondary-color);
1182
+
text-decoration: underline;
1185
+
.timeline-summary {
1186
+
color: var(--text-secondary);
1187
+
font-size: 0.9rem;
1191
+
/* Legacy styles for other sections */
1192
+
.entry-meta, .thread-header {
1195
+
align-items: center;
1196
+
margin-bottom: 0.5rem;
1197
+
font-size: 0.85rem;
1198
+
color: var(--text-secondary);
1203
+
color: var(--primary-color);
1207
+
font-size: 0.85rem;
1211
+
font-size: 1.1rem;
1213
+
margin-bottom: 0.5rem;
1217
+
color: var(--primary-color);
1218
+
text-decoration: none;
1222
+
color: var(--secondary-color);
1223
+
text-decoration: underline;
1227
+
color: var(--text-primary);
1229
+
margin-top: 0.5rem;
1232
+
/* Enhanced Threading Styles */
1234
+
/* Conversation Clusters */
1235
+
.conversation-cluster {
1236
+
background-color: var(--background);
1237
+
border: 2px solid var(--border-color);
1238
+
border-radius: 8px;
1239
+
margin-bottom: 2rem;
1241
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
1244
+
.conversation-header {
1245
+
background: linear-gradient(135deg, var(--surface) 0%, #f1f3f4 100%);
1246
+
padding: 0.75rem 1rem;
1247
+
border-bottom: 1px solid var(--border-color);
1250
+
.conversation-meta {
1252
+
justify-content: space-between;
1253
+
align-items: center;
1258
+
.conversation-count {
1260
+
color: var(--secondary-color);
1261
+
font-size: 0.9rem;
1264
+
.conversation-participants {
1265
+
font-size: 0.8rem;
1266
+
color: var(--text-secondary);
1268
+
text-align: right;
1271
+
.conversation-flow {
1275
+
/* Threaded Conversation Entries */
1276
+
.conversation-entry {
1277
+
position: relative;
1278
+
margin-bottom: 0.75rem;
1280
+
align-items: flex-start;
1283
+
.conversation-entry.level-0 {
1287
+
.conversation-entry.level-1 {
1288
+
margin-left: 1.5rem;
1291
+
.conversation-entry.level-2 {
1292
+
margin-left: 3rem;
1295
+
.conversation-entry.level-3 {
1296
+
margin-left: 4.5rem;
1299
+
.conversation-entry.level-4 {
1300
+
margin-left: 6rem;
1303
+
.entry-connector {
1305
+
background-color: var(--secondary-color);
1306
+
margin-right: 0.75rem;
1307
+
margin-top: 0.25rem;
1309
+
border-radius: 2px;
1313
+
.conversation-entry.level-0 .entry-connector {
1314
+
background-color: var(--accent-color);
1320
+
background-color: var(--surface);
1322
+
border-radius: 6px;
1323
+
border: 1px solid var(--border-color);
1324
+
transition: all 0.2s ease;
1327
+
.entry-content:hover {
1328
+
border-color: var(--secondary-color);
1329
+
box-shadow: 0 2px 8px rgba(52, 152, 219, 0.1);
1332
+
/* Reference Indicators */
1333
+
.reference-indicators {
1334
+
display: inline-flex;
1336
+
margin-left: 0.5rem;
1339
+
.ref-out, .ref-in {
1340
+
display: inline-block;
1343
+
border-radius: 50%;
1344
+
text-align: center;
1345
+
line-height: 1rem;
1346
+
font-size: 0.7rem;
1347
+
font-weight: bold;
1351
+
background-color: #e8f5e8;
1356
+
background-color: #e8f0ff;
1360
+
/* Reference Badges for Individual Posts */
1361
+
.timeline-entry.with-references {
1362
+
background-color: var(--surface);
1365
+
/* Conversation posts in unified timeline */
1366
+
.timeline-entry.conversation-post {
1367
+
background: transparent;
1369
+
margin-bottom: 0.5rem;
1370
+
padding: 0.5rem 0.75rem;
1373
+
.timeline-entry.conversation-post.level-0 {
1375
+
border-left: 2px solid var(--accent-color);
1376
+
padding-left: 0.75rem;
1379
+
.timeline-entry.conversation-post.level-1 {
1380
+
margin-left: 1.5rem;
1381
+
border-left: 2px solid var(--secondary-color);
1382
+
padding-left: 0.75rem;
1385
+
.timeline-entry.conversation-post.level-2 {
1386
+
margin-left: 3rem;
1387
+
border-left: 2px solid var(--text-secondary);
1388
+
padding-left: 0.75rem;
1391
+
.timeline-entry.conversation-post.level-3 {
1392
+
margin-left: 4.5rem;
1393
+
border-left: 2px solid var(--text-secondary);
1394
+
padding-left: 0.75rem;
1397
+
.timeline-entry.conversation-post.level-4 {
1398
+
margin-left: 6rem;
1399
+
border-left: 2px solid var(--text-secondary);
1400
+
padding-left: 0.75rem;
1403
+
/* Cross-thread linking */
1404
+
.cross-thread-links {
1405
+
margin-top: 0.5rem;
1406
+
padding-top: 0.5rem;
1407
+
border-top: 1px solid var(--border-color);
1410
+
.cross-thread-indicator {
1411
+
font-size: 0.75rem;
1412
+
color: var(--text-secondary);
1413
+
background-color: var(--surface);
1414
+
padding: 0.25rem 0.5rem;
1415
+
border-radius: 12px;
1416
+
border: 1px solid var(--border-color);
1417
+
display: inline-block;
1420
+
/* Inline shared references styling */
1421
+
.inline-shared-refs {
1422
+
margin-left: 0.5rem;
1423
+
font-size: 0.85rem;
1424
+
color: var(--text-secondary);
1427
+
.shared-ref-link {
1428
+
color: var(--primary-color);
1429
+
text-decoration: none;
1431
+
transition: color 0.2s ease;
1434
+
.shared-ref-link:hover {
1435
+
color: var(--secondary-color);
1436
+
text-decoration: underline;
1439
+
.shared-ref-more {
1440
+
font-style: italic;
1441
+
color: var(--text-secondary);
1442
+
font-size: 0.8rem;
1443
+
margin-left: 0.25rem;
1446
+
.user-anchor, .post-anchor {
1447
+
position: absolute;
1448
+
margin-top: -60px; /* Offset for fixed header */
1449
+
pointer-events: none;
1452
+
.cross-thread-link {
1453
+
color: var(--primary-color);
1454
+
text-decoration: none;
1456
+
transition: color 0.2s ease;
1459
+
.cross-thread-link:hover {
1460
+
color: var(--secondary-color);
1461
+
text-decoration: underline;
1464
+
.reference-badges {
1467
+
margin-left: 0.5rem;
1472
+
display: inline-block;
1473
+
padding: 0.1rem 0.4rem;
1474
+
border-radius: 12px;
1475
+
font-size: 0.7rem;
1477
+
text-transform: uppercase;
1478
+
letter-spacing: 0.05em;
1481
+
.ref-badge.ref-outbound {
1482
+
background-color: #e8f5e8;
1484
+
border: 1px solid #c3e6c3;
1487
+
.ref-badge.ref-inbound {
1488
+
background-color: #e8f0ff;
1490
+
border: 1px solid #b3d9ff;
1493
+
/* Author Color Coding */
1494
+
.timeline-author {
1495
+
position: relative;
1498
+
.timeline-author::before {
1500
+
display: inline-block;
1503
+
border-radius: 50%;
1504
+
margin-right: 0.5rem;
1505
+
background-color: var(--secondary-color);
1508
+
/* Generate consistent colors for authors */
1509
+
.author-avsm::before { background-color: #e74c3c; }
1510
+
.author-mort::before { background-color: #3498db; }
1511
+
.author-mte::before { background-color: #2ecc71; }
1512
+
.author-ryan::before { background-color: #f39c12; }
1513
+
.author-mwd::before { background-color: #9b59b6; }
1514
+
.author-dra::before { background-color: #1abc9c; }
1515
+
.author-pf341::before { background-color: #34495e; }
1516
+
.author-sadiqj::before { background-color: #e67e22; }
1517
+
.author-martinkl::before { background-color: #8e44ad; }
1518
+
.author-jonsterling::before { background-color: #27ae60; }
1519
+
.author-jon::before { background-color: #f1c40f; }
1520
+
.author-onkar::before { background-color: #e91e63; }
1521
+
.author-gabriel::before { background-color: #00bcd4; }
1522
+
.author-jess::before { background-color: #ff5722; }
1523
+
.author-ibrahim::before { background-color: #607d8b; }
1524
+
.author-andres::before { background-color: #795548; }
1525
+
.author-eeg::before { background-color: #ff9800; }
1527
+
/* Section Headers */
1528
+
.conversations-section h3,
1529
+
.referenced-posts-section h3,
1530
+
.individual-posts-section h3 {
1531
+
border-bottom: 2px solid var(--border-color);
1532
+
padding-bottom: 0.5rem;
1533
+
margin-bottom: 1.5rem;
1534
+
position: relative;
1537
+
.conversations-section h3::before {
1539
+
margin-right: 0.5rem;
1542
+
.referenced-posts-section h3::before {
1544
+
margin-right: 0.5rem;
1547
+
.individual-posts-section h3::before {
1549
+
margin-right: 0.5rem;
1552
+
/* Legacy thread styles (for backward compatibility) */
1554
+
background-color: var(--background);
1555
+
border: 1px solid var(--border-color);
1558
+
margin-bottom: 1rem;
1562
+
background-color: var(--surface);
1563
+
padding: 0.5rem 0.75rem;
1564
+
border-bottom: 1px solid var(--border-color);
1569
+
color: var(--secondary-color);
1573
+
padding: 0.5rem 0.75rem;
1574
+
border-bottom: 1px solid var(--border-color);
1577
+
.thread-entry:last-child {
1578
+
border-bottom: none;
1581
+
.thread-entry.reply {
1582
+
margin-left: var(--thread-indent);
1583
+
border-left: 3px solid var(--secondary-color);
1584
+
background-color: var(--surface);
1587
+
/* Links Section */
1589
+
background-color: var(--background);
1594
+
word-break: break-word;
1598
+
color: var(--secondary-color);
1599
+
text-decoration: none;
1602
+
.link-url a:hover {
1603
+
text-decoration: underline;
1607
+
font-size: 0.9rem;
1608
+
color: var(--text-secondary);
1609
+
font-weight: normal;
1612
+
.referencing-entries {
1613
+
margin-top: 0.75rem;
1618
+
color: var(--text-secondary);
1619
+
font-size: 0.9rem;
1622
+
.referencing-entries ul {
1624
+
margin-top: 0.5rem;
1625
+
padding-left: 1rem;
1628
+
.referencing-entries li {
1629
+
margin-bottom: 0.25rem;
1630
+
font-size: 0.9rem;
1633
+
.referencing-entries .more {
1634
+
font-style: italic;
1635
+
color: var(--text-secondary);
1638
+
/* Users Section */
1640
+
background-color: var(--background);
1646
+
align-items: start;
1647
+
margin-bottom: 1rem;
1653
+
border-radius: 50%;
1654
+
object-fit: cover;
1658
+
margin-bottom: 0.25rem;
1662
+
font-size: 0.9rem;
1663
+
color: var(--text-secondary);
1664
+
font-weight: normal;
1668
+
font-size: 0.9rem;
1669
+
color: var(--text-secondary);
1673
+
color: var(--secondary-color);
1674
+
text-decoration: none;
1677
+
.user-meta a:hover {
1678
+
text-decoration: underline;
1690
+
font-size: 0.95rem;
1691
+
margin-bottom: 0.5rem;
1692
+
color: var(--text-secondary);
1701
+
margin-bottom: 0.25rem;
1702
+
font-size: 0.9rem;
1707
+
max-width: var(--max-width);
1708
+
margin: 3rem auto 2rem;
1709
+
padding: 1rem 2rem;
1710
+
text-align: center;
1711
+
color: var(--text-secondary);
1712
+
font-size: 0.85rem;
1713
+
border-top: 1px solid var(--border-color);
1717
+
color: var(--secondary-color);
1718
+
text-decoration: none;
1721
+
.site-footer a:hover {
1722
+
text-decoration: underline;
1726
+
@media (max-width: 768px) {
1728
+
font-size: 1.3rem;
1732
+
flex-direction: column;
1734
+
align-items: flex-start;
1745
+
.thread-entry.reply {
1746
+
margin-left: calc(var(--thread-indent) / 2);
1750
+
flex-direction: column;
1755
+
<file path="src/thicket/templates/timeline.html">
1756
+
{% extends "base.html" %}
1758
+
{% block page_title %}Timeline - {{ title }}{% endblock %}
1760
+
{% block content %}
1761
+
{% set seen_users = [] %}
1762
+
<div class="page-content">
1763
+
<h2>Recent Posts & Conversations</h2>
1765
+
<section class="unified-timeline">
1766
+
{% for item in timeline_items %}
1767
+
{% if item.type == "post" %}
1768
+
<!-- Individual Post -->
1769
+
<article class="timeline-entry {% if item.content.references %}with-references{% endif %}">
1770
+
<div class="timeline-meta">
1771
+
<time datetime="{{ item.content.entry.updated or item.content.entry.published }}" class="timeline-time">
1772
+
{{ (item.content.entry.updated or item.content.entry.published).strftime('%Y-%m-%d %H:%M') }}
1774
+
{% set homepage = get_user_homepage(item.content.username) %}
1775
+
{% if item.content.username not in seen_users %}
1776
+
<a id="{{ item.content.username }}" class="user-anchor"></a>
1777
+
{% set _ = seen_users.append(item.content.username) %}
1779
+
<a id="post-{{ loop.index0 }}-{{ safe_anchor_id(item.content.entry.id) }}" class="post-anchor"></a>
1781
+
<a href="{{ homepage }}" target="_blank" class="timeline-author">{{ item.content.display_name }}</a>
1783
+
<span class="timeline-author">{{ item.content.display_name }}</span>
1785
+
{% if item.content.references %}
1786
+
<div class="reference-badges">
1787
+
{% for ref in item.content.references %}
1788
+
{% if ref.type == 'outbound' %}
1789
+
<span class="ref-badge ref-outbound" title="References {{ ref.target_username or 'external post' }}">
1790
+
โ {{ ref.target_username or 'ext' }}
1792
+
{% elif ref.type == 'inbound' %}
1793
+
<span class="ref-badge ref-inbound" title="Referenced by {{ ref.source_username or 'external post' }}">
1794
+
โ {{ ref.source_username or 'ext' }}
1801
+
<div class="timeline-content">
1802
+
<strong class="timeline-title">
1803
+
<a href="{{ item.content.entry.link }}" target="_blank">{{ item.content.entry.title }}</a>
1805
+
{% if item.content.entry.summary %}
1806
+
<span class="timeline-summary">โ {{ clean_html_summary(item.content.entry.summary, 250) }}</span>
1808
+
{% if item.content.shared_references %}
1809
+
<span class="inline-shared-refs">
1810
+
{% for ref in item.content.shared_references[:3] %}
1811
+
{% if ref.target_username %}
1812
+
<a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %}
1815
+
{% if item.content.shared_references|length > 3 %}
1816
+
<span class="shared-ref-more">+{{ item.content.shared_references|length - 3 }} more</span>
1820
+
{% if item.content.cross_thread_links %}
1821
+
<div class="cross-thread-links">
1822
+
<span class="cross-thread-indicator">๐ Also appears: </span>
1823
+
{% for link in item.content.cross_thread_links %}
1824
+
<a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %}
1831
+
{% elif item.type == "thread" %}
1832
+
<!-- Conversation Thread -->
1833
+
{% set outer_loop_index = loop.index0 %}
1834
+
{% for thread_item in item.content %}
1835
+
<article class="timeline-entry conversation-post level-{{ thread_item.thread_level }}">
1836
+
<div class="timeline-meta">
1837
+
<time datetime="{{ thread_item.entry.updated or thread_item.entry.published }}" class="timeline-time">
1838
+
{{ (thread_item.entry.updated or thread_item.entry.published).strftime('%Y-%m-%d %H:%M') }}
1840
+
{% set homepage = get_user_homepage(thread_item.username) %}
1841
+
{% if thread_item.username not in seen_users %}
1842
+
<a id="{{ thread_item.username }}" class="user-anchor"></a>
1843
+
{% set _ = seen_users.append(thread_item.username) %}
1845
+
<a id="post-{{ outer_loop_index }}-{{ loop.index0 }}-{{ safe_anchor_id(thread_item.entry.id) }}" class="post-anchor"></a>
1847
+
<a href="{{ homepage }}" target="_blank" class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</a>
1849
+
<span class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</span>
1851
+
{% if thread_item.references_to or thread_item.referenced_by %}
1852
+
<span class="reference-indicators">
1853
+
{% if thread_item.references_to %}
1854
+
<span class="ref-out" title="References other posts">โ</span>
1856
+
{% if thread_item.referenced_by %}
1857
+
<span class="ref-in" title="Referenced by other posts">โ</span>
1862
+
<div class="timeline-content">
1863
+
<strong class="timeline-title">
1864
+
<a href="{{ thread_item.entry.link }}" target="_blank">{{ thread_item.entry.title }}</a>
1866
+
{% if thread_item.entry.summary %}
1867
+
<span class="timeline-summary">โ {{ clean_html_summary(thread_item.entry.summary, 300) }}</span>
1869
+
{% if thread_item.shared_references %}
1870
+
<span class="inline-shared-refs">
1871
+
{% for ref in thread_item.shared_references[:3] %}
1872
+
{% if ref.target_username %}
1873
+
<a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %}
1876
+
{% if thread_item.shared_references|length > 3 %}
1877
+
<span class="shared-ref-more">+{{ thread_item.shared_references|length - 3 }} more</span>
1881
+
{% if thread_item.cross_thread_links %}
1882
+
<div class="cross-thread-links">
1883
+
<span class="cross-thread-indicator">๐ Also appears: </span>
1884
+
{% for link in thread_item.cross_thread_links %}
1885
+
<a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %}
1899
+
<file path="src/thicket/templates/users.html">
1900
+
{% extends "base.html" %}
1902
+
{% block page_title %}Users - {{ title }}{% endblock %}
1904
+
{% block content %}
1905
+
<div class="page-content">
1907
+
<p class="page-description">All users contributing to this thicket, ordered by post count.</p>
1909
+
{% for user_info in users %}
1910
+
<article class="user-card">
1911
+
<div class="user-header">
1912
+
{% if user_info.metadata.icon and user_info.metadata.icon != "None" %}
1913
+
<img src="{{ user_info.metadata.icon }}" alt="{{ user_info.metadata.username }}" class="user-icon">
1915
+
<div class="user-info">
1917
+
{% if user_info.metadata.display_name %}
1918
+
{{ user_info.metadata.display_name }}
1919
+
<span class="username">({{ user_info.metadata.username }})</span>
1921
+
{{ user_info.metadata.username }}
1924
+
<div class="user-meta">
1925
+
{% if user_info.metadata.homepage %}
1926
+
<a href="{{ user_info.metadata.homepage }}" target="_blank">{{ user_info.metadata.homepage }}</a>
1928
+
{% if user_info.metadata.email %}
1929
+
<span class="separator">โข</span>
1930
+
<a href="mailto:{{ user_info.metadata.email }}">{{ user_info.metadata.email }}</a>
1932
+
<span class="separator">โข</span>
1933
+
<span class="post-count">{{ user_info.metadata.entry_count }} posts</span>
1938
+
{% if user_info.recent_entries %}
1939
+
<div class="user-recent">
1940
+
<h4>Recent posts:</h4>
1942
+
{% for display_name, entry in user_info.recent_entries %}
1944
+
<a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a>
1945
+
<time datetime="{{ entry.updated or entry.published }}">
1946
+
({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }})
1959
+
<file path="README.md">
1962
+
A modern CLI tool for persisting Atom/RSS feeds in Git repositories, designed to enable distributed webblog comment structures.
1966
+
- **Feed Auto-Discovery**: Automatically extracts user metadata from Atom/RSS feeds
1967
+
- **Git Storage**: Stores feed entries in a Git repository with full history
1968
+
- **Duplicate Management**: Manual curation of duplicate entries across feeds
1969
+
- **Modern CLI**: Built with Typer and Rich for beautiful terminal output
1970
+
- **Comprehensive Parsing**: Supports RSS 0.9x, RSS 1.0, RSS 2.0, and Atom feeds
1971
+
- **Cron-Friendly**: Designed for scheduled execution
1976
+
# Install from source
1979
+
# Or install with dev dependencies
1980
+
pip install -e .[dev]
1985
+
1. **Initialize a new thicket repository:**
1987
+
thicket init ./my-feeds
1990
+
2. **Add a user with their feed:**
1992
+
thicket add user "alice" --feed "https://alice.example.com/feed.xml"
1995
+
3. **Sync feeds to download entries:**
1997
+
thicket sync --all
2000
+
4. **List users and feeds:**
2002
+
thicket list users
2003
+
thicket list feeds
2004
+
thicket list entries
2011
+
thicket init <git-store-path> [--cache-dir <path>] [--config <config-file>]
2014
+
### Add Users and Feeds
2016
+
# Add user with auto-discovery
2017
+
thicket add user "username" --feed "https://example.com/feed.xml"
2019
+
# Add user with manual metadata
2020
+
thicket add user "username" \
2021
+
--feed "https://example.com/feed.xml" \
2022
+
--email "user@example.com" \
2023
+
--homepage "https://example.com" \
2024
+
--display-name "User Name"
2026
+
# Add additional feed to existing user
2027
+
thicket add feed "username" "https://example.com/other-feed.xml"
2033
+
thicket sync --all
2035
+
# Sync specific user
2036
+
thicket sync --user "username"
2038
+
# Dry run (preview changes)
2039
+
thicket sync --all --dry-run
2042
+
### List Information
2045
+
thicket list users
2048
+
thicket list feeds
2050
+
# List feeds for specific user
2051
+
thicket list feeds --user "username"
2053
+
# List recent entries
2054
+
thicket list entries --limit 20
2056
+
# List entries for specific user
2057
+
thicket list entries --user "username"
2060
+
### Manage Duplicates
2062
+
# List duplicate mappings
2063
+
thicket duplicates list
2065
+
# Mark entries as duplicates
2066
+
thicket duplicates add "https://example.com/dup" "https://example.com/canonical"
2068
+
# Remove duplicate mapping
2069
+
thicket duplicates remove "https://example.com/dup"
2074
+
Thicket uses a YAML configuration file (default: `thicket.yaml`):
2077
+
git_store: ./feeds-repo
2078
+
cache_dir: ~/.cache/thicket
2082
+
- https://alice.example.com/feed.xml
2083
+
email: alice@example.com
2084
+
homepage: https://alice.example.com
2085
+
display_name: Alice
2088
+
## Git Repository Structure
2092
+
โโโ index.json # User directory index
2093
+
โโโ duplicates.json # Duplicate entry mappings
2095
+
โ โโโ metadata.json # User metadata
2096
+
โ โโโ entry_id_1.json # Feed entries
2097
+
โ โโโ entry_id_2.json
2106
+
# Install in development mode
2107
+
pip install -e .[dev]
2114
+
black --check src/
2116
+
# Run type checking
2122
+
- **CLI**: Modern interface with Typer and Rich
2123
+
- **Feed Processing**: Universal parsing with feedparser
2124
+
- **Git Storage**: Structured storage with GitPython
2125
+
- **Data Models**: Pydantic for validation and serialization
2126
+
- **Async HTTP**: httpx for efficient feed fetching
2130
+
- **Blog Aggregation**: Collect and archive blog posts from multiple sources
2131
+
- **Comment Networks**: Enable distributed commenting systems
2132
+
- **Feed Archival**: Preserve feed history beyond typical feed depth limits
2133
+
- **Content Curation**: Manage and deduplicate content across feeds
2137
+
MIT License - see LICENSE file for details.
2140
+
<file path="src/thicket/cli/commands/index_cmd.py">
2141
+
"""CLI command for building reference index from blog entries."""
2144
+
from pathlib import Path
2145
+
from typing import Optional
2148
+
from rich.console import Console
2149
+
from rich.progress import (
2153
+
TaskProgressColumn,
2156
+
from rich.table import Table
2158
+
from ...core.git_store import GitStore
2159
+
from ...core.reference_parser import ReferenceIndex, ReferenceParser
2160
+
from ..main import app
2161
+
from ..utils import get_tsv_mode, load_config
2163
+
console = Console()
2168
+
config_file: Optional[Path] = typer.Option(
2172
+
help="Path to configuration file",
2174
+
output_file: Optional[Path] = typer.Option(
2178
+
help="Path to output index file (default: updates links.json in git store)",
2180
+
verbose: bool = typer.Option(
2184
+
help="Show detailed progress information",
2187
+
"""Build a reference index showing which blog entries reference others.
2189
+
This command analyzes all blog entries to detect cross-references between
2190
+
different blogs, creating an index that can be used to build threaded
2191
+
views of related content.
2193
+
Updates the unified links.json file with reference data.
2196
+
# Load configuration
2197
+
config = load_config(config_file)
2199
+
# Initialize Git store
2200
+
git_store = GitStore(config.git_store)
2202
+
# Initialize reference parser
2203
+
parser = ReferenceParser()
2205
+
# Build user domain mapping
2207
+
console.print("Building user domain mapping...")
2208
+
user_domains = parser.build_user_domain_mapping(git_store)
2211
+
console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")
2213
+
# Initialize reference index
2214
+
ref_index = ReferenceIndex()
2215
+
ref_index.user_domains = user_domains
2218
+
index = git_store._load_index()
2219
+
users = list(index.users.keys())
2222
+
console.print("[yellow]No users found in Git store[/yellow]")
2223
+
raise typer.Exit(0)
2225
+
# Process all entries
2227
+
total_references = 0
2228
+
all_references = []
2232
+
TextColumn("[progress.description]{task.description}"),
2234
+
TaskProgressColumn(),
2238
+
# Count total entries first
2239
+
counting_task = progress.add_task("Counting entries...", total=len(users))
2241
+
for username in users:
2242
+
entries = git_store.list_entries(username)
2243
+
entry_counts[username] = len(entries)
2244
+
total_entries += len(entries)
2245
+
progress.advance(counting_task)
2247
+
progress.remove_task(counting_task)
2249
+
# Process entries - extract references
2250
+
processing_task = progress.add_task(
2251
+
f"Extracting references from {total_entries} entries...",
2252
+
total=total_entries
2255
+
for username in users:
2256
+
entries = git_store.list_entries(username)
2258
+
for entry in entries:
2259
+
# Extract references from this entry
2260
+
references = parser.extract_references(entry, username, user_domains)
2261
+
all_references.extend(references)
2263
+
progress.advance(processing_task)
2265
+
if verbose and references:
2266
+
console.print(f" Found {len(references)} references in {username}:{entry.title[:50]}...")
2268
+
progress.remove_task(processing_task)
2270
+
# Resolve target_entry_ids for references
2271
+
if all_references:
2272
+
resolve_task = progress.add_task(
2273
+
f"Resolving {len(all_references)} references...",
2274
+
total=len(all_references)
2278
+
console.print(f"Resolving target entry IDs for {len(all_references)} references...")
2280
+
resolved_references = parser.resolve_target_entry_ids(all_references, git_store)
2282
+
# Count resolved references
2283
+
resolved_count = sum(1 for ref in resolved_references if ref.target_entry_id is not None)
2285
+
console.print(f"Resolved {resolved_count} out of {len(all_references)} references")
2287
+
# Add resolved references to index
2288
+
for ref in resolved_references:
2289
+
ref_index.add_reference(ref)
2290
+
total_references += 1
2291
+
progress.advance(resolve_task)
2293
+
progress.remove_task(resolve_task)
2295
+
# Determine output path
2297
+
output_path = output_file
2299
+
output_path = config.git_store / "links.json"
2301
+
# Load existing links data or create new structure
2302
+
if output_path.exists() and not output_file:
2303
+
# Load existing unified structure
2304
+
with open(output_path) as f:
2305
+
existing_data = json.load(f)
2307
+
# Create new structure
2310
+
"reverse_mapping": {},
2311
+
"user_domains": {}
2314
+
# Update with reference data
2315
+
existing_data["references"] = ref_index.to_dict()["references"]
2316
+
existing_data["user_domains"] = {k: list(v) for k, v in user_domains.items()}
2318
+
# Save updated structure
2319
+
with open(output_path, "w") as f:
2320
+
json.dump(existing_data, f, indent=2, default=str)
2323
+
if not get_tsv_mode():
2324
+
console.print("\n[green]โ Reference index built successfully[/green]")
2326
+
# Create summary table or TSV output
2327
+
if get_tsv_mode():
2328
+
print("Metric\tCount")
2329
+
print(f"Total Users\t{len(users)}")
2330
+
print(f"Total Entries\t{total_entries}")
2331
+
print(f"Total References\t{total_references}")
2332
+
print(f"Outbound Refs\t{len(ref_index.outbound_refs)}")
2333
+
print(f"Inbound Refs\t{len(ref_index.inbound_refs)}")
2334
+
print(f"Output File\t{output_path}")
2336
+
table = Table(title="Reference Index Summary")
2337
+
table.add_column("Metric", style="cyan")
2338
+
table.add_column("Count", style="green")
2340
+
table.add_row("Total Users", str(len(users)))
2341
+
table.add_row("Total Entries", str(total_entries))
2342
+
table.add_row("Total References", str(total_references))
2343
+
table.add_row("Outbound Refs", str(len(ref_index.outbound_refs)))
2344
+
table.add_row("Inbound Refs", str(len(ref_index.inbound_refs)))
2345
+
table.add_row("Output File", str(output_path))
2347
+
console.print(table)
2349
+
# Show some interesting statistics
2350
+
if total_references > 0:
2351
+
if not get_tsv_mode():
2352
+
console.print("\n[bold]Reference Statistics:[/bold]")
2354
+
# Most referenced users
2355
+
target_counts = {}
2356
+
unresolved_domains = set()
2358
+
for ref in ref_index.references:
2359
+
if ref.target_username:
2360
+
target_counts[ref.target_username] = target_counts.get(ref.target_username, 0) + 1
2362
+
# Track unresolved domains
2363
+
from urllib.parse import urlparse
2364
+
domain = urlparse(ref.target_url).netloc.lower()
2365
+
unresolved_domains.add(domain)
2368
+
if get_tsv_mode():
2369
+
print("Referenced User\tReference Count")
2370
+
for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
2371
+
print(f"{username}\t{count}")
2373
+
console.print("\nMost referenced users:")
2374
+
for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
2375
+
console.print(f" {username}: {count} references")
2377
+
if unresolved_domains and verbose:
2378
+
if get_tsv_mode():
2379
+
print("Unresolved Domain\tCount")
2380
+
for domain in sorted(list(unresolved_domains)[:10]):
2381
+
print(f"{domain}\t1")
2382
+
if len(unresolved_domains) > 10:
2383
+
print(f"... and {len(unresolved_domains) - 10} more\t...")
2385
+
console.print(f"\nUnresolved domains: {len(unresolved_domains)}")
2386
+
for domain in sorted(list(unresolved_domains)[:10]):
2387
+
console.print(f" {domain}")
2388
+
if len(unresolved_domains) > 10:
2389
+
console.print(f" ... and {len(unresolved_domains) - 10} more")
2391
+
except Exception as e:
2392
+
console.print(f"[red]Error building reference index: {e}[/red]")
2394
+
console.print_exception()
2395
+
raise typer.Exit(1)
2400
+
config_file: Optional[Path] = typer.Option(
2404
+
help="Path to configuration file",
2406
+
index_file: Optional[Path] = typer.Option(
2410
+
help="Path to reference index file (default: links.json in git store)",
2412
+
username: Optional[str] = typer.Option(
2416
+
help="Show threads for specific username only",
2418
+
entry_id: Optional[str] = typer.Option(
2422
+
help="Show thread for specific entry ID",
2424
+
min_size: int = typer.Option(
2428
+
help="Minimum thread size to display",
2431
+
"""Show threaded view of related blog entries.
2433
+
This command uses the reference index to show which blog entries
2434
+
are connected through cross-references, creating an email-style
2435
+
threaded view of the conversation.
2437
+
Reads reference data from the unified links.json file.
2440
+
# Load configuration
2441
+
config = load_config(config_file)
2443
+
# Determine index file path
2445
+
index_path = index_file
2447
+
index_path = config.git_store / "links.json"
2449
+
if not index_path.exists():
2450
+
console.print(f"[red]Links file not found: {index_path}[/red]")
2451
+
console.print("Run 'thicket links' and 'thicket index' first to build the reference index")
2452
+
raise typer.Exit(1)
2454
+
# Load unified data
2455
+
with open(index_path) as f:
2456
+
unified_data = json.load(f)
2458
+
# Check if references exist in the unified structure
2459
+
if "references" not in unified_data:
2460
+
console.print(f"[red]No references found in {index_path}[/red]")
2461
+
console.print("Run 'thicket index' first to build the reference index")
2462
+
raise typer.Exit(1)
2464
+
# Extract reference data and reconstruct ReferenceIndex
2465
+
ref_index = ReferenceIndex.from_dict({
2466
+
"references": unified_data["references"],
2467
+
"user_domains": unified_data.get("user_domains", {})
2470
+
# Initialize Git store to get entry details
2471
+
git_store = GitStore(config.git_store)
2473
+
if entry_id and username:
2474
+
# Show specific thread
2475
+
thread_members = ref_index.get_thread_members(username, entry_id)
2476
+
_display_thread(thread_members, ref_index, git_store, f"Thread for {username}:{entry_id}")
2479
+
# Show all threads involving this user
2480
+
user_index = git_store._load_index()
2481
+
user = user_index.get_user(username)
2483
+
console.print(f"[red]User not found: {username}[/red]")
2484
+
raise typer.Exit(1)
2486
+
entries = git_store.list_entries(username)
2487
+
threads_found = set()
2489
+
console.print(f"[bold]Threads involving {username}:[/bold]\n")
2491
+
for entry in entries:
2492
+
thread_members = ref_index.get_thread_members(username, entry.id)
2493
+
if len(thread_members) >= min_size:
2494
+
thread_key = tuple(sorted(thread_members))
2495
+
if thread_key not in threads_found:
2496
+
threads_found.add(thread_key)
2497
+
_display_thread(thread_members, ref_index, git_store, f"Thread #{len(threads_found)}")
2500
+
# Show all threads
2501
+
console.print("[bold]All conversation threads:[/bold]\n")
2503
+
all_threads = set()
2504
+
processed_entries = set()
2507
+
user_index = git_store._load_index()
2508
+
for username in user_index.users.keys():
2509
+
entries = git_store.list_entries(username)
2510
+
for entry in entries:
2511
+
entry_key = (username, entry.id)
2512
+
if entry_key in processed_entries:
2515
+
thread_members = ref_index.get_thread_members(username, entry.id)
2516
+
if len(thread_members) >= min_size:
2517
+
thread_key = tuple(sorted(thread_members))
2518
+
if thread_key not in all_threads:
2519
+
all_threads.add(thread_key)
2520
+
_display_thread(thread_members, ref_index, git_store, f"Thread #{len(all_threads)}")
2522
+
# Mark all members as processed
2523
+
for member in thread_members:
2524
+
processed_entries.add(member)
2526
+
if not all_threads:
2527
+
console.print("[yellow]No conversation threads found[/yellow]")
2528
+
console.print(f"(minimum thread size: {min_size})")
2530
+
except Exception as e:
2531
+
console.print(f"[red]Error showing threads: {e}[/red]")
2532
+
raise typer.Exit(1)
2535
+
def _display_thread(thread_members, ref_index, git_store, title):
2536
+
"""Display a single conversation thread."""
2537
+
console.print(f"[bold cyan]{title}[/bold cyan]")
2538
+
console.print(f"Thread size: {len(thread_members)} entries")
2540
+
# Get entry details for each member
2541
+
thread_entries = []
2542
+
for username, entry_id in thread_members:
2543
+
entry = git_store.get_entry(username, entry_id)
2545
+
thread_entries.append((username, entry))
2547
+
# Sort by publication date
2548
+
thread_entries.sort(key=lambda x: x[1].published or x[1].updated)
2551
+
for i, (username, entry) in enumerate(thread_entries):
2552
+
prefix = "โโ" if i < len(thread_entries) - 1 else "โโ"
2554
+
# Get references for this entry
2555
+
outbound = ref_index.get_outbound_refs(username, entry.id)
2556
+
inbound = ref_index.get_inbound_refs(username, entry.id)
2559
+
if outbound or inbound:
2560
+
ref_info = f" ({len(outbound)} out, {len(inbound)} in)"
2562
+
console.print(f" {prefix} [{username}] {entry.title[:60]}...{ref_info}")
2564
+
if entry.published:
2565
+
console.print(f" Published: {entry.published.strftime('%Y-%m-%d')}")
2567
+
console.print() # Empty line after each thread
2570
+
<file path="src/thicket/cli/commands/info_cmd.py">
2571
+
"""CLI command for displaying detailed information about a specific atom entry."""
2574
+
from pathlib import Path
2575
+
from typing import Optional
2578
+
from rich.console import Console
2579
+
from rich.panel import Panel
2580
+
from rich.table import Table
2581
+
from rich.text import Text
2583
+
from ...core.git_store import GitStore
2584
+
from ...core.reference_parser import ReferenceIndex
2585
+
from ..main import app
2586
+
from ..utils import load_config, get_tsv_mode
2588
+
console = Console()
2593
+
identifier: str = typer.Argument(
2595
+
help="The atom ID or URL of the entry to display information about"
2597
+
username: Optional[str] = typer.Option(
2601
+
help="Username to search for the entry (if not provided, searches all users)"
2603
+
config_file: Optional[Path] = typer.Option(
2604
+
Path("thicket.yaml"),
2607
+
help="Path to configuration file",
2609
+
show_content: bool = typer.Option(
2612
+
help="Include the full content of the entry in the output"
2615
+
"""Display detailed information about a specific atom entry.
2617
+
You can specify the entry using either its atom ID or URL.
2618
+
Shows all metadata for the given entry, including title, dates, categories,
2619
+
and summarizes all inbound and outbound links to/from other posts.
2622
+
# Load configuration
2623
+
config = load_config(config_file)
2625
+
# Initialize Git store
2626
+
git_store = GitStore(config.git_store)
2630
+
found_username = None
2632
+
# Check if identifier looks like a URL
2633
+
is_url = identifier.startswith(('http://', 'https://'))
2636
+
# Search specific username
2639
+
entries = git_store.list_entries(username)
2641
+
if str(e.link) == identifier:
2643
+
found_username = username
2646
+
# Search by atom ID
2647
+
entry = git_store.get_entry(username, identifier)
2649
+
found_username = username
2651
+
# Search all users
2652
+
index = git_store._load_index()
2653
+
for user in index.users.keys():
2656
+
entries = git_store.list_entries(user)
2658
+
if str(e.link) == identifier:
2660
+
found_username = user
2665
+
# Search by atom ID
2666
+
entry = git_store.get_entry(user, identifier)
2668
+
found_username = user
2671
+
if not entry or not found_username:
2673
+
console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found for user '{username}'[/red]")
2675
+
console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found in any user's entries[/red]")
2676
+
raise typer.Exit(1)
2678
+
# Load reference index if available
2679
+
links_path = config.git_store / "links.json"
2681
+
if links_path.exists():
2682
+
with open(links_path) as f:
2683
+
unified_data = json.load(f)
2685
+
# Check if references exist in the unified structure
2686
+
if "references" in unified_data:
2687
+
ref_index = ReferenceIndex.from_dict({
2688
+
"references": unified_data["references"],
2689
+
"user_domains": unified_data.get("user_domains", {})
2692
+
# Display information
2693
+
if get_tsv_mode():
2694
+
_display_entry_info_tsv(entry, found_username, ref_index, show_content)
2696
+
_display_entry_info(entry, found_username)
2699
+
_display_link_info(entry, found_username, ref_index)
2701
+
console.print("\n[yellow]No reference index found. Run 'thicket links' and 'thicket index' to build cross-reference data.[/yellow]")
2703
+
# Optionally display content
2704
+
if show_content and entry.content:
2705
+
_display_content(entry.content)
2707
+
except Exception as e:
2708
+
console.print(f"[red]Error displaying entry info: {e}[/red]")
2709
+
raise typer.Exit(1)
2712
+
def _display_entry_info(entry, username: str) -> None:
2713
+
"""Display basic entry information in a structured format."""
2715
+
# Create main info panel
2716
+
info_table = Table.grid(padding=(0, 2))
2717
+
info_table.add_column("Field", style="cyan bold", width=15)
2718
+
info_table.add_column("Value", style="white")
2720
+
info_table.add_row("User", f"[green]{username}[/green]")
2721
+
info_table.add_row("Atom ID", f"[blue]{entry.id}[/blue]")
2722
+
info_table.add_row("Title", entry.title)
2723
+
info_table.add_row("Link", str(entry.link))
2725
+
if entry.published:
2726
+
info_table.add_row("Published", entry.published.strftime("%Y-%m-%d %H:%M:%S UTC"))
2728
+
info_table.add_row("Updated", entry.updated.strftime("%Y-%m-%d %H:%M:%S UTC"))
2731
+
# Truncate long summaries
2732
+
summary = entry.summary[:200] + "..." if len(entry.summary) > 200 else entry.summary
2733
+
info_table.add_row("Summary", summary)
2735
+
if entry.categories:
2736
+
categories_text = ", ".join(entry.categories)
2737
+
info_table.add_row("Categories", categories_text)
2741
+
if "name" in entry.author:
2742
+
author_info.append(entry.author["name"])
2743
+
if "email" in entry.author:
2744
+
author_info.append(f"<{entry.author['email']}>")
2746
+
info_table.add_row("Author", " ".join(author_info))
2748
+
if entry.content_type:
2749
+
info_table.add_row("Content Type", entry.content_type)
2752
+
info_table.add_row("Rights", entry.rights)
2755
+
info_table.add_row("Source Feed", entry.source)
2759
+
title=f"[bold]Entry Information[/bold]",
2760
+
border_style="blue"
2763
+
console.print(panel)
2766
+
def _display_link_info(entry, username: str, ref_index: ReferenceIndex) -> None:
2767
+
"""Display inbound and outbound link information."""
2770
+
outbound_refs = ref_index.get_outbound_refs(username, entry.id)
2771
+
inbound_refs = ref_index.get_inbound_refs(username, entry.id)
2773
+
if not outbound_refs and not inbound_refs:
2774
+
console.print("\n[dim]No cross-references found for this entry.[/dim]")
2777
+
# Create links table
2778
+
links_table = Table(title="Cross-References")
2779
+
links_table.add_column("Direction", style="cyan", width=10)
2780
+
links_table.add_column("Target/Source", style="green", width=20)
2781
+
links_table.add_column("URL", style="blue", width=50)
2783
+
# Add outbound references
2784
+
for ref in outbound_refs:
2785
+
target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"
2786
+
links_table.add_row("โ Out", target_info, ref.target_url)
2788
+
# Add inbound references
2789
+
for ref in inbound_refs:
2790
+
source_info = f"{ref.source_username}:{ref.source_entry_id}"
2791
+
links_table.add_row("โ In", source_info, ref.target_url)
2794
+
console.print(links_table)
2797
+
console.print(f"\n[bold]Summary:[/bold] {len(outbound_refs)} outbound, {len(inbound_refs)} inbound references")
2800
+
def _display_content(content: str) -> None:
2801
+
"""Display the full content of the entry."""
2803
+
# Truncate very long content
2804
+
display_content = content
2805
+
if len(content) > 5000:
2806
+
display_content = content[:5000] + "\n\n[... content truncated ...]"
2810
+
title="[bold]Entry Content[/bold]",
2811
+
border_style="green",
2816
+
console.print(panel)
2819
+
def _display_entry_info_tsv(entry, username: str, ref_index: Optional[ReferenceIndex], show_content: bool) -> None:
2820
+
"""Display entry information in TSV format."""
2823
+
print("Field\tValue")
2824
+
print(f"User\t{username}")
2825
+
print(f"Atom ID\t{entry.id}")
2826
+
print(f"Title\t{entry.title.replace(chr(9), ' ').replace(chr(10), ' ').replace(chr(13), ' ')}")
2827
+
print(f"Link\t{entry.link}")
2829
+
if entry.published:
2830
+
print(f"Published\t{entry.published.strftime('%Y-%m-%d %H:%M:%S UTC')}")
2832
+
print(f"Updated\t{entry.updated.strftime('%Y-%m-%d %H:%M:%S UTC')}")
2835
+
# Escape tabs and newlines in summary
2836
+
summary = entry.summary.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
2837
+
print(f"Summary\t{summary}")
2839
+
if entry.categories:
2840
+
print(f"Categories\t{', '.join(entry.categories)}")
2844
+
if "name" in entry.author:
2845
+
author_info.append(entry.author["name"])
2846
+
if "email" in entry.author:
2847
+
author_info.append(f"<{entry.author['email']}>")
2849
+
print(f"Author\t{' '.join(author_info)}")
2851
+
if entry.content_type:
2852
+
print(f"Content Type\t{entry.content_type}")
2855
+
print(f"Rights\t{entry.rights}")
2858
+
print(f"Source Feed\t{entry.source}")
2860
+
# Add reference info if available
2862
+
outbound_refs = ref_index.get_outbound_refs(username, entry.id)
2863
+
inbound_refs = ref_index.get_inbound_refs(username, entry.id)
2865
+
print(f"Outbound References\t{len(outbound_refs)}")
2866
+
print(f"Inbound References\t{len(inbound_refs)}")
2868
+
# Show each reference
2869
+
for ref in outbound_refs:
2870
+
target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"
2871
+
print(f"Outbound Reference\t{target_info}\t{ref.target_url}")
2873
+
for ref in inbound_refs:
2874
+
source_info = f"{ref.source_username}:{ref.source_entry_id}"
2875
+
print(f"Inbound Reference\t{source_info}\t{ref.target_url}")
2877
+
# Show content if requested
2878
+
if show_content and entry.content:
2879
+
# Escape tabs and newlines in content
2880
+
content = entry.content.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
2881
+
print(f"Content\t{content}")
2884
+
<file path="src/thicket/cli/commands/init.py">
2885
+
"""Initialize command for thicket."""
2887
+
from pathlib import Path
2888
+
from typing import Optional
2891
+
from pydantic import ValidationError
2893
+
from ...core.git_store import GitStore
2894
+
from ...models import ThicketConfig
2895
+
from ..main import app
2896
+
from ..utils import print_error, print_success, save_config
2901
+
git_store: Path = typer.Argument(..., help="Path to Git repository for storing feeds"),
2902
+
cache_dir: Optional[Path] = typer.Option(
2903
+
None, "--cache-dir", "-c", help="Cache directory (default: ~/.cache/thicket)"
2905
+
config_file: Optional[Path] = typer.Option(
2906
+
None, "--config", help="Configuration file path (default: thicket.yaml)"
2908
+
force: bool = typer.Option(
2909
+
False, "--force", "-f", help="Overwrite existing configuration"
2912
+
"""Initialize a new thicket configuration and Git store."""
2914
+
# Set default paths
2915
+
if cache_dir is None:
2916
+
from platformdirs import user_cache_dir
2917
+
cache_dir = Path(user_cache_dir("thicket"))
2919
+
if config_file is None:
2920
+
config_file = Path("thicket.yaml")
2922
+
# Check if config already exists
2923
+
if config_file.exists() and not force:
2924
+
print_error(f"Configuration file already exists: {config_file}")
2925
+
print_error("Use --force to overwrite")
2926
+
raise typer.Exit(1)
2928
+
# Create cache directory
2929
+
cache_dir.mkdir(parents=True, exist_ok=True)
2931
+
# Create Git store
2933
+
GitStore(git_store)
2934
+
print_success(f"Initialized Git store at: {git_store}")
2935
+
except Exception as e:
2936
+
print_error(f"Failed to initialize Git store: {e}")
2937
+
raise typer.Exit(1) from e
2939
+
# Create configuration
2941
+
config = ThicketConfig(
2942
+
git_store=git_store,
2943
+
cache_dir=cache_dir,
2947
+
save_config(config, config_file)
2948
+
print_success(f"Created configuration file: {config_file}")
2950
+
except ValidationError as e:
2951
+
print_error(f"Invalid configuration: {e}")
2952
+
raise typer.Exit(1) from e
2953
+
except Exception as e:
2954
+
print_error(f"Failed to create configuration: {e}")
2955
+
raise typer.Exit(1) from e
2957
+
print_success("Thicket initialized successfully!")
2958
+
print_success(f"Git store: {git_store}")
2959
+
print_success(f"Cache directory: {cache_dir}")
2960
+
print_success(f"Configuration: {config_file}")
2961
+
print_success("Run 'thicket add user' to add your first user and feed.")
2964
+
<file path="src/thicket/cli/__init__.py">
2965
+
"""CLI interface for thicket."""
2967
+
from .main import app
2972
+
<file path="src/thicket/core/__init__.py">
2973
+
"""Core business logic for thicket."""
2975
+
from .feed_parser import FeedParser
2976
+
from .git_store import GitStore
2978
+
__all__ = ["FeedParser", "GitStore"]
2981
+
<file path="src/thicket/core/feed_parser.py">
2982
+
"""Feed parsing and normalization with auto-discovery."""
2984
+
from datetime import datetime
2985
+
from typing import Optional
2986
+
from urllib.parse import urlparse
2991
+
from pydantic import HttpUrl, ValidationError
2993
+
from ..models import AtomEntry, FeedMetadata
2997
+
"""Parser for RSS/Atom feeds with normalization and auto-discovery."""
2999
+
def __init__(self, user_agent: str = "thicket/0.1.0"):
3000
+
"""Initialize the feed parser."""
3001
+
self.user_agent = user_agent
3002
+
self.allowed_tags = [
3003
+
"a", "abbr", "acronym", "b", "blockquote", "br", "code", "em",
3004
+
"i", "li", "ol", "p", "pre", "strong", "ul", "h1", "h2", "h3",
3005
+
"h4", "h5", "h6", "img", "div", "span",
3007
+
self.allowed_attributes = {
3008
+
"a": ["href", "title"],
3009
+
"abbr": ["title"],
3010
+
"acronym": ["title"],
3011
+
"img": ["src", "alt", "title", "width", "height"],
3012
+
"blockquote": ["cite"],
3015
+
async def fetch_feed(self, url: HttpUrl) -> str:
3016
+
"""Fetch feed content from URL."""
3017
+
async with httpx.AsyncClient() as client:
3018
+
response = await client.get(
3020
+
headers={"User-Agent": self.user_agent},
3022
+
follow_redirects=True,
3024
+
response.raise_for_status()
3025
+
return response.text
3027
+
def parse_feed(self, content: str, source_url: Optional[HttpUrl] = None) -> tuple[FeedMetadata, list[AtomEntry]]:
3028
+
"""Parse feed content and return metadata and entries."""
3029
+
parsed = feedparser.parse(content)
3031
+
if parsed.bozo and parsed.bozo_exception:
3032
+
# Try to continue with potentially malformed feed
3035
+
# Extract feed metadata
3036
+
feed_meta = self._extract_feed_metadata(parsed.feed)
3038
+
# Extract and normalize entries
3040
+
for entry in parsed.entries:
3042
+
atom_entry = self._normalize_entry(entry, source_url)
3043
+
entries.append(atom_entry)
3044
+
except Exception as e:
3045
+
# Log error but continue processing other entries
3046
+
print(f"Error processing entry {getattr(entry, 'id', 'unknown')}: {e}")
3049
+
return feed_meta, entries
3051
+
def _extract_feed_metadata(self, feed: feedparser.FeedParserDict) -> FeedMetadata:
3052
+
"""Extract metadata from feed for auto-discovery."""
3053
+
# Parse author information
3054
+
author_name = None
3055
+
author_email = None
3058
+
if hasattr(feed, 'author_detail'):
3059
+
author_name = feed.author_detail.get('name')
3060
+
author_email = feed.author_detail.get('email')
3061
+
author_uri = feed.author_detail.get('href')
3062
+
elif hasattr(feed, 'author'):
3063
+
author_name = feed.author
3065
+
# Parse managing editor for RSS feeds
3066
+
if not author_email and hasattr(feed, 'managingEditor'):
3067
+
author_email = feed.managingEditor
3071
+
if hasattr(feed, 'link'):
3073
+
feed_link = HttpUrl(feed.link)
3074
+
except ValidationError:
3077
+
# Parse image/icon/logo
3082
+
if hasattr(feed, 'image'):
3084
+
image_url = HttpUrl(feed.image.get('href', feed.image.get('url', '')))
3085
+
except (ValidationError, AttributeError):
3088
+
if hasattr(feed, 'icon'):
3090
+
icon = HttpUrl(feed.icon)
3091
+
except ValidationError:
3094
+
if hasattr(feed, 'logo'):
3096
+
logo = HttpUrl(feed.logo)
3097
+
except ValidationError:
3100
+
return FeedMetadata(
3101
+
title=getattr(feed, 'title', None),
3102
+
author_name=author_name,
3103
+
author_email=author_email,
3104
+
author_uri=HttpUrl(author_uri) if author_uri else None,
3108
+
image_url=image_url,
3109
+
description=getattr(feed, 'description', None),
3112
+
def _normalize_entry(self, entry: feedparser.FeedParserDict, source_url: Optional[HttpUrl] = None) -> AtomEntry:
3113
+
"""Normalize an entry to Atom format."""
3114
+
# Parse timestamps
3115
+
updated = self._parse_timestamp(entry.get('updated_parsed') or entry.get('published_parsed'))
3116
+
published = self._parse_timestamp(entry.get('published_parsed'))
3119
+
content = self._extract_content(entry)
3120
+
content_type = self._extract_content_type(entry)
3123
+
author = self._extract_author(entry)
3125
+
# Parse categories/tags
3127
+
if hasattr(entry, 'tags'):
3128
+
categories = [tag.get('term', '') for tag in entry.tags if tag.get('term')]
3130
+
# Sanitize HTML content
3132
+
content = self._sanitize_html(content)
3134
+
summary = entry.get('summary', '')
3136
+
summary = self._sanitize_html(summary)
3139
+
id=entry.get('id', entry.get('link', '')),
3140
+
title=entry.get('title', ''),
3141
+
link=HttpUrl(entry.get('link', '')),
3143
+
published=published,
3144
+
summary=summary or None,
3145
+
content=content or None,
3146
+
content_type=content_type,
3148
+
categories=categories,
3149
+
rights=entry.get('rights', None),
3150
+
source=str(source_url) if source_url else None,
3153
+
def _parse_timestamp(self, time_struct) -> datetime:
3154
+
"""Parse feedparser time struct to datetime."""
3156
+
return datetime(*time_struct[:6])
3157
+
return datetime.now()
3159
+
def _extract_content(self, entry: feedparser.FeedParserDict) -> Optional[str]:
3160
+
"""Extract the best content from an entry."""
3161
+
# Prefer content over summary
3162
+
if hasattr(entry, 'content') and entry.content:
3163
+
# Find the best content (prefer text/html, then text/plain)
3164
+
for content_item in entry.content:
3165
+
if content_item.get('type') in ['text/html', 'html']:
3166
+
return content_item.get('value', '')
3167
+
elif content_item.get('type') in ['text/plain', 'text']:
3168
+
return content_item.get('value', '')
3169
+
# Fallback to first content item
3170
+
return entry.content[0].get('value', '')
3172
+
# Fallback to summary
3173
+
return entry.get('summary', '')
3175
+
def _extract_content_type(self, entry: feedparser.FeedParserDict) -> str:
3176
+
"""Extract content type from entry."""
3177
+
if hasattr(entry, 'content') and entry.content:
3178
+
content_type = entry.content[0].get('type', 'html')
3179
+
# Normalize content type
3180
+
if content_type in ['text/html', 'html']:
3182
+
elif content_type in ['text/plain', 'text']:
3184
+
elif content_type == 'xhtml':
3188
+
def _extract_author(self, entry: feedparser.FeedParserDict) -> Optional[dict]:
3189
+
"""Extract author information from entry."""
3192
+
if hasattr(entry, 'author_detail'):
3194
+
'name': entry.author_detail.get('name'),
3195
+
'email': entry.author_detail.get('email'),
3196
+
'uri': entry.author_detail.get('href'),
3198
+
elif hasattr(entry, 'author'):
3199
+
author['name'] = entry.author
3201
+
return author if author else None
3203
+
def _sanitize_html(self, html: str) -> str:
3204
+
"""Sanitize HTML content to prevent XSS."""
3205
+
return bleach.clean(
3207
+
tags=self.allowed_tags,
3208
+
attributes=self.allowed_attributes,
3212
+
def sanitize_entry_id(self, entry_id: str) -> str:
3213
+
"""Sanitize entry ID to be a safe filename."""
3214
+
# Parse URL to get meaningful parts
3215
+
parsed = urlparse(entry_id)
3217
+
# Start with the path component
3219
+
# Remove leading slash and replace problematic characters
3220
+
safe_id = parsed.path.lstrip('/').replace('/', '_').replace('\\', '_')
3222
+
# Use the entire ID as fallback
3223
+
safe_id = entry_id
3225
+
# Replace problematic characters
3227
+
for char in safe_id:
3228
+
if char.isalnum() or char in '-_.':
3229
+
safe_chars.append(char)
3231
+
safe_chars.append('_')
3233
+
safe_id = ''.join(safe_chars)
3235
+
# Ensure it's not too long (max 200 chars)
3236
+
if len(safe_id) > 200:
3237
+
safe_id = safe_id[:200]
3239
+
# Ensure it's not empty
3246
+
<file path="src/thicket/core/reference_parser.py">
3247
+
"""Reference detection and parsing for blog entries."""
3250
+
from typing import Optional
3251
+
from urllib.parse import urlparse
3253
+
from ..models import AtomEntry
3256
+
class BlogReference:
3257
+
"""Represents a reference from one blog entry to another."""
3261
+
source_entry_id: str,
3262
+
source_username: str,
3264
+
target_username: Optional[str] = None,
3265
+
target_entry_id: Optional[str] = None,
3267
+
self.source_entry_id = source_entry_id
3268
+
self.source_username = source_username
3269
+
self.target_url = target_url
3270
+
self.target_username = target_username
3271
+
self.target_entry_id = target_entry_id
3273
+
def to_dict(self) -> dict:
3274
+
"""Convert to dictionary for JSON serialization."""
3276
+
"source_entry_id": self.source_entry_id,
3277
+
"source_username": self.source_username,
3278
+
"target_url": self.target_url,
3281
+
# Only include optional fields if they are not None
3282
+
if self.target_username is not None:
3283
+
result["target_username"] = self.target_username
3284
+
if self.target_entry_id is not None:
3285
+
result["target_entry_id"] = self.target_entry_id
3290
+
def from_dict(cls, data: dict) -> "BlogReference":
3291
+
"""Create from dictionary."""
3293
+
source_entry_id=data["source_entry_id"],
3294
+
source_username=data["source_username"],
3295
+
target_url=data["target_url"],
3296
+
target_username=data.get("target_username"),
3297
+
target_entry_id=data.get("target_entry_id"),
3301
+
class ReferenceIndex:
3302
+
"""Index of blog-to-blog references for creating threaded views."""
3304
+
def __init__(self):
3305
+
self.references: list[BlogReference] = []
3306
+
self.outbound_refs: dict[
3307
+
str, list[BlogReference]
3308
+
] = {} # entry_id -> outbound refs
3309
+
self.inbound_refs: dict[
3310
+
str, list[BlogReference]
3311
+
] = {} # entry_id -> inbound refs
3312
+
self.user_domains: dict[str, set[str]] = {} # username -> set of domains
3314
+
def add_reference(self, ref: BlogReference) -> None:
3315
+
"""Add a reference to the index."""
3316
+
self.references.append(ref)
3318
+
# Update outbound references
3319
+
source_key = f"{ref.source_username}:{ref.source_entry_id}"
3320
+
if source_key not in self.outbound_refs:
3321
+
self.outbound_refs[source_key] = []
3322
+
self.outbound_refs[source_key].append(ref)
3324
+
# Update inbound references if we can identify the target
3325
+
if ref.target_username and ref.target_entry_id:
3326
+
target_key = f"{ref.target_username}:{ref.target_entry_id}"
3327
+
if target_key not in self.inbound_refs:
3328
+
self.inbound_refs[target_key] = []
3329
+
self.inbound_refs[target_key].append(ref)
3331
+
def get_outbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:
3332
+
"""Get all outbound references from an entry."""
3333
+
key = f"{username}:{entry_id}"
3334
+
return self.outbound_refs.get(key, [])
3336
+
def get_inbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:
3337
+
"""Get all inbound references to an entry."""
3338
+
key = f"{username}:{entry_id}"
3339
+
return self.inbound_refs.get(key, [])
3341
+
def get_thread_members(self, username: str, entry_id: str) -> set[tuple[str, str]]:
3342
+
"""Get all entries that are part of the same thread."""
3344
+
to_visit = [(username, entry_id)]
3345
+
thread_members = set()
3348
+
current_user, current_entry = to_visit.pop()
3349
+
if (current_user, current_entry) in visited:
3352
+
visited.add((current_user, current_entry))
3353
+
thread_members.add((current_user, current_entry))
3355
+
# Add outbound references
3356
+
for ref in self.get_outbound_refs(current_user, current_entry):
3357
+
if ref.target_username and ref.target_entry_id:
3358
+
to_visit.append((ref.target_username, ref.target_entry_id))
3360
+
# Add inbound references
3361
+
for ref in self.get_inbound_refs(current_user, current_entry):
3362
+
to_visit.append((ref.source_username, ref.source_entry_id))
3364
+
return thread_members
3366
+
def to_dict(self) -> dict:
3367
+
"""Convert to dictionary for JSON serialization."""
3369
+
"references": [ref.to_dict() for ref in self.references],
3370
+
"user_domains": {k: list(v) for k, v in self.user_domains.items()},
3374
+
def from_dict(cls, data: dict) -> "ReferenceIndex":
3375
+
"""Create from dictionary."""
3377
+
for ref_data in data.get("references", []):
3378
+
ref = BlogReference.from_dict(ref_data)
3379
+
index.add_reference(ref)
3381
+
for username, domains in data.get("user_domains", {}).items():
3382
+
index.user_domains[username] = set(domains)
3387
+
class ReferenceParser:
3388
+
"""Parses blog entries to detect references to other blogs."""
3390
+
def __init__(self):
3391
+
# Common blog platforms and patterns
3392
+
self.blog_patterns = [
3393
+
r"https?://[^/]+\.(?:org|com|net|io|dev|me|co\.uk)/.*", # Common blog domains
3394
+
r"https?://[^/]+\.github\.io/.*", # GitHub Pages
3395
+
r"https?://[^/]+\.substack\.com/.*", # Substack
3396
+
r"https?://medium\.com/.*", # Medium
3397
+
r"https?://[^/]+\.wordpress\.com/.*", # WordPress.com
3398
+
r"https?://[^/]+\.blogspot\.com/.*", # Blogger
3401
+
# Compile regex patterns
3402
+
self.link_pattern = re.compile(
3403
+
r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL
3405
+
self.url_pattern = re.compile(r'https?://[^\s<>"]+')
3407
+
def extract_links_from_html(self, html_content: str) -> list[tuple[str, str]]:
3408
+
"""Extract all links from HTML content."""
3411
+
# Extract links from <a> tags
3412
+
for match in self.link_pattern.finditer(html_content):
3413
+
url = match.group(1)
3415
+
r"<[^>]+>", "", match.group(2)
3416
+
).strip() # Remove HTML tags from link text
3417
+
links.append((url, text))
3421
+
def is_blog_url(self, url: str) -> bool:
3422
+
"""Check if a URL likely points to a blog post."""
3423
+
for pattern in self.blog_patterns:
3424
+
if re.match(pattern, url):
3428
+
def _is_likely_blog_post_url(self, url: str) -> bool:
3429
+
"""Check if a same-domain URL likely points to a blog post (not CSS, images, etc.)."""
3430
+
parsed_url = urlparse(url)
3431
+
path = parsed_url.path.lower()
3433
+
# Skip obvious non-blog content
3434
+
if any(path.endswith(ext) for ext in ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.pdf', '.xml', '.json']):
3437
+
# Skip common non-blog paths
3438
+
if any(segment in path for segment in ['/static/', '/assets/', '/css/', '/js/', '/images/', '/img/', '/media/', '/uploads/']):
3441
+
# Skip fragment-only links (same page anchors)
3442
+
if not path or path == '/':
3445
+
# Look for positive indicators of blog posts
3446
+
# Common blog post patterns: dates, slugs, post indicators
3447
+
blog_indicators = [
3448
+
r'/\d{4}/', # Year in path
3449
+
r'/\d{4}/\d{2}/', # Year/month in path
3459
+
for pattern in blog_indicators:
3460
+
if re.search(pattern, path):
3463
+
# If it has a reasonable path depth and doesn't match exclusions, likely a blog post
3464
+
path_segments = [seg for seg in path.split('/') if seg]
3465
+
return len(path_segments) >= 1 # At least one meaningful path segment
3467
+
def resolve_target_user(
3468
+
self, url: str, user_domains: dict[str, set[str]]
3469
+
) -> Optional[str]:
3470
+
"""Try to resolve a URL to a known user based on domain mapping."""
3471
+
parsed_url = urlparse(url)
3472
+
domain = parsed_url.netloc.lower()
3474
+
for username, domains in user_domains.items():
3475
+
if domain in domains:
3480
+
def extract_references(
3481
+
self, entry: AtomEntry, username: str, user_domains: dict[str, set[str]]
3482
+
) -> list[BlogReference]:
3483
+
"""Extract all blog references from an entry."""
3486
+
# Combine all text content for analysis
3487
+
content_to_search = []
3489
+
content_to_search.append(entry.content)
3491
+
content_to_search.append(entry.summary)
3493
+
for content in content_to_search:
3494
+
links = self.extract_links_from_html(content)
3496
+
for url, _link_text in links:
3498
+
urlparse(str(entry.link)).netloc.lower() if entry.link else ""
3500
+
link_domain = urlparse(url).netloc.lower()
3502
+
# Check if this looks like a blog URL
3503
+
if not self.is_blog_url(url):
3506
+
# For same-domain links, apply additional filtering to avoid non-blog content
3507
+
if link_domain == entry_domain:
3508
+
# Only include same-domain links that look like blog posts
3509
+
if not self._is_likely_blog_post_url(url):
3512
+
# Try to resolve to a known user
3513
+
if link_domain == entry_domain:
3514
+
# Same domain - target user is the same as source user
3515
+
target_username: Optional[str] = username
3517
+
# Different domain - try to resolve
3518
+
target_username = self.resolve_target_user(url, user_domains)
3520
+
ref = BlogReference(
3521
+
source_entry_id=entry.id,
3522
+
source_username=username,
3524
+
target_username=target_username,
3525
+
target_entry_id=None, # Will be resolved later if possible
3528
+
references.append(ref)
3532
+
def build_user_domain_mapping(self, git_store: "GitStore") -> dict[str, set[str]]:
3533
+
"""Build mapping of usernames to their known domains."""
3535
+
index = git_store._load_index()
3537
+
for username, user_metadata in index.users.items():
3540
+
# Add domains from feeds
3541
+
for feed_url in user_metadata.feeds:
3542
+
domain = urlparse(feed_url).netloc.lower()
3544
+
domains.add(domain)
3546
+
# Add domain from homepage
3547
+
if user_metadata.homepage:
3548
+
domain = urlparse(str(user_metadata.homepage)).netloc.lower()
3550
+
domains.add(domain)
3552
+
user_domains[username] = domains
3554
+
return user_domains
3556
+
def _build_url_to_entry_mapping(self, git_store: "GitStore") -> dict[str, str]:
3557
+
"""Build a comprehensive mapping from URLs to entry IDs using git store data.
3559
+
This creates a bidirectional mapping that handles:
3560
+
- Entry link URLs -> Entry IDs
3561
+
- URL variations (with/without www, http/https)
3562
+
- Multiple URLs pointing to the same entry
3564
+
url_to_entry: dict[str, str] = {}
3566
+
# Load index to get all users
3567
+
index = git_store._load_index()
3569
+
for username in index.users.keys():
3570
+
entries = git_store.list_entries(username)
3572
+
for entry in entries:
3574
+
link_url = str(entry.link)
3575
+
entry_id = entry.id
3577
+
# Map the canonical link URL
3578
+
url_to_entry[link_url] = entry_id
3580
+
# Handle common URL variations
3581
+
parsed = urlparse(link_url)
3582
+
if parsed.netloc and parsed.path:
3583
+
# Add version without www
3584
+
if parsed.netloc.startswith('www.'):
3585
+
no_www_url = f"{parsed.scheme}://{parsed.netloc[4:]}{parsed.path}"
3587
+
no_www_url += f"?{parsed.query}"
3588
+
if parsed.fragment:
3589
+
no_www_url += f"#{parsed.fragment}"
3590
+
url_to_entry[no_www_url] = entry_id
3592
+
# Add version with www if not present
3593
+
elif not parsed.netloc.startswith('www.'):
3594
+
www_url = f"{parsed.scheme}://www.{parsed.netloc}{parsed.path}"
3596
+
www_url += f"?{parsed.query}"
3597
+
if parsed.fragment:
3598
+
www_url += f"#{parsed.fragment}"
3599
+
url_to_entry[www_url] = entry_id
3601
+
# Add http/https variations
3602
+
if parsed.scheme == 'https':
3603
+
http_url = link_url.replace('https://', 'http://', 1)
3604
+
url_to_entry[http_url] = entry_id
3605
+
elif parsed.scheme == 'http':
3606
+
https_url = link_url.replace('http://', 'https://', 1)
3607
+
url_to_entry[https_url] = entry_id
3609
+
return url_to_entry
3611
+
def _normalize_url(self, url: str) -> str:
3612
+
"""Normalize URL for consistent matching.
3614
+
Handles common variations like trailing slashes, fragments, etc.
3616
+
parsed = urlparse(url)
3618
+
# Remove trailing slash from path
3619
+
path = parsed.path.rstrip('/') if parsed.path != '/' else parsed.path
3621
+
# Reconstruct without fragment for consistent matching
3622
+
normalized = f"{parsed.scheme}://{parsed.netloc}{path}"
3624
+
normalized += f"?{parsed.query}"
3628
+
def resolve_target_entry_ids(
3629
+
self, references: list[BlogReference], git_store: "GitStore"
3630
+
) -> list[BlogReference]:
3631
+
"""Resolve target_entry_id for references using comprehensive URL mapping."""
3632
+
resolved_refs = []
3634
+
# Build comprehensive URL to entry ID mapping
3635
+
url_to_entry = self._build_url_to_entry_mapping(git_store)
3637
+
for ref in references:
3638
+
# If we already have a target_entry_id, keep the reference as-is
3639
+
if ref.target_entry_id is not None:
3640
+
resolved_refs.append(ref)
3643
+
# If we don't have a target_username, we can't resolve it
3644
+
if ref.target_username is None:
3645
+
resolved_refs.append(ref)
3648
+
# Try to resolve using URL mapping
3649
+
resolved_entry_id = None
3651
+
# First, try exact match
3652
+
if ref.target_url in url_to_entry:
3653
+
resolved_entry_id = url_to_entry[ref.target_url]
3655
+
# Try normalized URL matching
3656
+
normalized_target = self._normalize_url(ref.target_url)
3657
+
if normalized_target in url_to_entry:
3658
+
resolved_entry_id = url_to_entry[normalized_target]
3660
+
# Try URL variations
3661
+
for mapped_url, entry_id in url_to_entry.items():
3662
+
if self._normalize_url(mapped_url) == normalized_target:
3663
+
resolved_entry_id = entry_id
3666
+
# Verify the resolved entry belongs to the target username
3667
+
if resolved_entry_id:
3668
+
# Double-check by loading the actual entry
3669
+
entries = git_store.list_entries(ref.target_username)
3670
+
entry_found = any(entry.id == resolved_entry_id for entry in entries)
3671
+
if not entry_found:
3672
+
resolved_entry_id = None
3674
+
# Create a new reference with the resolved target_entry_id
3675
+
resolved_ref = BlogReference(
3676
+
source_entry_id=ref.source_entry_id,
3677
+
source_username=ref.source_username,
3678
+
target_url=ref.target_url,
3679
+
target_username=ref.target_username,
3680
+
target_entry_id=resolved_entry_id,
3682
+
resolved_refs.append(resolved_ref)
3684
+
return resolved_refs
3687
+
<file path="src/thicket/models/__init__.py">
3688
+
"""Data models for thicket."""
3690
+
from .config import ThicketConfig, UserConfig
3691
+
from .feed import AtomEntry, DuplicateMap, FeedMetadata
3692
+
from .user import GitStoreIndex, UserMetadata
3705
+
<file path="src/thicket/models/feed.py">
3706
+
"""Feed and entry models for thicket."""
3708
+
from datetime import datetime
3709
+
from typing import TYPE_CHECKING, Optional
3711
+
from pydantic import BaseModel, ConfigDict, EmailStr, HttpUrl
3714
+
from .config import UserConfig
3717
+
class AtomEntry(BaseModel):
3718
+
"""Represents an Atom feed entry stored in the Git repository."""
3720
+
model_config = ConfigDict(
3721
+
json_encoders={datetime: lambda v: v.isoformat()},
3722
+
str_strip_whitespace=True,
3725
+
id: str # Original Atom ID
3729
+
published: Optional[datetime] = None
3730
+
summary: Optional[str] = None
3731
+
content: Optional[str] = None # Full body content from Atom entry
3732
+
content_type: Optional[str] = "html" # text, html, xhtml
3733
+
author: Optional[dict] = None
3734
+
categories: list[str] = []
3735
+
rights: Optional[str] = None # Copyright info
3736
+
source: Optional[str] = None # Source feed URL
3739
+
class FeedMetadata(BaseModel):
3740
+
"""Metadata extracted from a feed for auto-discovery."""
3742
+
title: Optional[str] = None
3743
+
author_name: Optional[str] = None
3744
+
author_email: Optional[EmailStr] = None
3745
+
author_uri: Optional[HttpUrl] = None
3746
+
link: Optional[HttpUrl] = None
3747
+
logo: Optional[HttpUrl] = None
3748
+
icon: Optional[HttpUrl] = None
3749
+
image_url: Optional[HttpUrl] = None
3750
+
description: Optional[str] = None
3752
+
def to_user_config(self, username: str, feed_url: HttpUrl) -> "UserConfig":
3753
+
"""Convert discovered metadata to UserConfig with fallbacks."""
3754
+
from .config import UserConfig
3756
+
return UserConfig(
3757
+
username=username,
3759
+
display_name=self.author_name or self.title,
3760
+
email=self.author_email,
3761
+
homepage=self.author_uri or self.link,
3762
+
icon=self.logo or self.icon or self.image_url,
3766
+
class DuplicateMap(BaseModel):
3767
+
"""Maps duplicate entry IDs to canonical entry IDs."""
3769
+
duplicates: dict[str, str] = {} # duplicate_id -> canonical_id
3770
+
comment: str = "Entry IDs that map to the same canonical content"
3772
+
def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
3773
+
"""Add a duplicate mapping."""
3774
+
self.duplicates[duplicate_id] = canonical_id
3776
+
def remove_duplicate(self, duplicate_id: str) -> bool:
3777
+
"""Remove a duplicate mapping. Returns True if existed."""
3778
+
return self.duplicates.pop(duplicate_id, None) is not None
3780
+
def get_canonical(self, entry_id: str) -> str:
3781
+
"""Get canonical ID for an entry (returns original if not duplicate)."""
3782
+
return self.duplicates.get(entry_id, entry_id)
3784
+
def is_duplicate(self, entry_id: str) -> bool:
3785
+
"""Check if entry ID is marked as duplicate."""
3786
+
return entry_id in self.duplicates
3788
+
def get_duplicates_for_canonical(self, canonical_id: str) -> list[str]:
3789
+
"""Get all duplicate IDs that map to a canonical ID."""
3792
+
for duplicate_id, canonical in self.duplicates.items()
3793
+
if canonical == canonical_id
3797
+
<file path="src/thicket/models/user.py">
3798
+
"""User metadata models for thicket."""
3800
+
from datetime import datetime
3801
+
from typing import Optional
3803
+
from pydantic import BaseModel, ConfigDict
3806
+
class UserMetadata(BaseModel):
3807
+
"""Metadata about a user stored in the Git repository."""
3809
+
model_config = ConfigDict(
3810
+
json_encoders={datetime: lambda v: v.isoformat()},
3811
+
str_strip_whitespace=True,
3815
+
display_name: Optional[str] = None
3816
+
email: Optional[str] = None
3817
+
homepage: Optional[str] = None
3818
+
icon: Optional[str] = None
3819
+
feeds: list[str] = []
3820
+
directory: str # Directory name in Git store
3822
+
last_updated: datetime
3823
+
entry_count: int = 0
3825
+
def update_timestamp(self) -> None:
3826
+
"""Update the last_updated timestamp to now."""
3827
+
self.last_updated = datetime.now()
3829
+
def increment_entry_count(self, count: int = 1) -> None:
3830
+
"""Increment the entry count by the given amount."""
3831
+
self.entry_count += count
3832
+
self.update_timestamp()
3835
+
class GitStoreIndex(BaseModel):
3836
+
"""Index of all users and their directories in the Git store."""
3838
+
model_config = ConfigDict(
3839
+
json_encoders={datetime: lambda v: v.isoformat()}
3842
+
users: dict[str, UserMetadata] = {} # username -> UserMetadata
3844
+
last_updated: datetime
3845
+
total_entries: int = 0
3847
+
def add_user(self, user_metadata: UserMetadata) -> None:
3848
+
"""Add or update a user in the index."""
3849
+
self.users[user_metadata.username] = user_metadata
3850
+
self.last_updated = datetime.now()
3852
+
def remove_user(self, username: str) -> bool:
3853
+
"""Remove a user from the index. Returns True if user existed."""
3854
+
if username in self.users:
3855
+
del self.users[username]
3856
+
self.last_updated = datetime.now()
3860
+
def get_user(self, username: str) -> Optional[UserMetadata]:
3861
+
"""Get user metadata by username."""
3862
+
return self.users.get(username)
3864
+
def update_entry_count(self, username: str, count: int) -> None:
3865
+
"""Update entry count for a user and total."""
3866
+
user = self.get_user(username)
3868
+
user.increment_entry_count(count)
3869
+
self.total_entries += count
3870
+
self.last_updated = datetime.now()
3872
+
def recalculate_totals(self) -> None:
3873
+
"""Recalculate total entries from all users."""
3874
+
self.total_entries = sum(user.entry_count for user in self.users.values())
3875
+
self.last_updated = datetime.now()
3878
+
<file path="src/thicket/utils/__init__.py">
3879
+
"""Utility modules for thicket."""
3881
+
# This module will contain shared utilities
3882
+
# For now, it's empty but can be expanded with common functions
3885
+
<file path="src/thicket/__init__.py">
3886
+
"""Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories."""
3888
+
__version__ = "0.1.0"
3889
+
__author__ = "thicket"
3890
+
__email__ = "thicket@example.com"
3893
+
<file path="src/thicket/__main__.py">
3894
+
"""Entry point for running thicket as a module."""
3896
+
from .cli.main import app
3898
+
if __name__ == "__main__":
3902
+
<file path=".gitignore">
3903
+
# Byte-compiled / optimized / DLL files
3911
+
# Distribution / packaging
3925
+
share/python-wheels/
3932
+
# Usually these files are written by a python script from a template
3933
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
3939
+
pip-delete-this-directory.txt
3941
+
# Unit test / coverage reports
3964
+
db.sqlite3-journal
3973
+
# Sphinx documentation
3980
+
# Jupyter Notebook
3981
+
.ipynb_checkpoints
3988
+
# For a library or package, you might want to ignore these files since the code is
3989
+
# intended to run in multiple environments; otherwise, check them in:
3993
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
3994
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
3995
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
3996
+
# install all needed dependencies.
4000
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
4001
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
4002
+
# commonly ignored for libraries.
4006
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
4007
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
4008
+
# commonly ignored for libraries.
4009
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
4014
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
4015
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
4016
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
4023
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
4025
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
4026
+
# in the .venv directory. It is recommended not to include this directory in version control.
4029
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
4033
+
celerybeat-schedule
4036
+
# SageMath parsed files
4049
+
# Spyder project settings
4053
+
# Rope project settings
4056
+
# mkdocs documentation
4064
+
# Pyre type checker
4067
+
# pytype static type analyzer
4070
+
# Cython debug symbols
4074
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
4075
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
4076
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
4077
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
4081
+
# Abstra is an AI-powered process automation framework.
4082
+
# Ignore directories containing user credentials, local state, and settings.
4083
+
# Learn more at https://abstra.io/docs
4086
+
# Visual Studio Code
4087
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
4088
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
4089
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
4090
+
# you could uncomment the following to ignore the entire vscode folder
4096
+
# PyPI configuration file
4105
+
.streamlit/secrets.toml
4110
+
<file path="CLAUDE.md">
4111
+
My goal is to build a CLI tool called thicket in Python that maintains a Git repository within which Atom feeds can be persisted, including their contents.
4113
+
# Python Environment and Package Management
4115
+
This project uses `uv` for Python package management and virtual environment handling.
4117
+
## Running Commands
4119
+
ALWAYS use `uv run` to execute Python commands:
4121
+
- Run the CLI: `uv run -m thicket`
4122
+
- Run tests: `uv run pytest`
4123
+
- Type checking: `uv run mypy src/`
4124
+
- Linting: `uv run ruff check src/`
4125
+
- Format code: `uv run ruff format src/`
4126
+
- Compile check: `uv run python -m py_compile <file>`
4128
+
## Package Management
4130
+
- Add dependencies: `uv add <package>`
4131
+
- Add dev dependencies: `uv add --dev <package>`
4132
+
- Install dependencies: `uv sync`
4133
+
- Update dependencies: `uv lock --upgrade`
4135
+
# Project Structure
4137
+
The configuration file specifies:
4138
+
- the location of a git store
4139
+
- a list of usernames and target Atom/RSS feed(s) and optional metadata about the username such as their email, homepage, icon and display name
4140
+
- a cache directory to store temporary results such as feed downloads and their last modification date that speed up operations across runs of the tool
4142
+
The Git data store should:
4143
+
- have a subdirectory per user
4144
+
- within that directory, an entry per Atom entry indexed by the Atom id for that entry. The id should be sanitised consistently to be a safe filename. RSS feed should be normalized to Atom before storing it.
4145
+
- within each entry file, the metadata of the Atom feed converted into a JSON format that preserves as much metadata as possible.
4146
+
- have a JSON file in the Git repository that indexes the users, their associated directories within the Git repository, and any other metadata about that user from the config file
4147
+
The CLI should be modern and use cool progress bars and any otfrom ecosystem libraries.
4149
+
The intention behind the Git repository is that it can be queried by other websites in order to build a webblog structure of comments that link to other blogs.
4152
+
<file path="pyproject.toml">
4154
+
requires = ["hatchling"]
4155
+
build-backend = "hatchling.build"
4159
+
dynamic = ["version"]
4160
+
description = "A CLI tool for persisting Atom/RSS feeds in Git repositories"
4161
+
readme = "README.md"
4163
+
requires-python = ">=3.9"
4165
+
{name = "thicket", email = "thicket@example.com"},
4168
+
"Development Status :: 3 - Alpha",
4169
+
"Intended Audience :: Developers",
4170
+
"License :: OSI Approved :: MIT License",
4171
+
"Operating System :: OS Independent",
4172
+
"Programming Language :: Python :: 3",
4173
+
"Programming Language :: Python :: 3.9",
4174
+
"Programming Language :: Python :: 3.10",
4175
+
"Programming Language :: Python :: 3.11",
4176
+
"Programming Language :: Python :: 3.12",
4177
+
"Programming Language :: Python :: 3.13",
4178
+
"Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary",
4179
+
"Topic :: Software Development :: Version Control :: Git",
4180
+
"Topic :: Text Processing :: Markup :: XML",
4185
+
"GitPython>=3.1.40",
4186
+
"feedparser>=6.0.11",
4187
+
"pydantic>=2.11.0",
4188
+
"pydantic-settings>=2.10.0",
4190
+
"pendulum>=3.0.0",
4192
+
"platformdirs>=4.0.0",
4194
+
"email_validator",
4198
+
[project.optional-dependencies]
4201
+
"pytest-asyncio>=0.24.0",
4202
+
"pytest-cov>=6.0.0",
4206
+
"types-PyYAML>=6.0.0",
4210
+
Homepage = "https://github.com/example/thicket"
4211
+
Documentation = "https://github.com/example/thicket"
4212
+
Repository = "https://github.com/example/thicket"
4213
+
"Bug Tracker" = "https://github.com/example/thicket/issues"
4216
+
thicket = "thicket.cli.main:app"
4218
+
[tool.hatch.version]
4219
+
path = "src/thicket/__init__.py"
4221
+
[tool.hatch.build.targets.wheel]
4222
+
packages = ["src/thicket"]
4226
+
target-version = ['py39']
4227
+
include = '\.pyi?$'
4228
+
extend-exclude = '''
4243
+
target-version = "py39"
4248
+
"E", # pycodestyle errors
4249
+
"W", # pycodestyle warnings
4252
+
"B", # flake8-bugbear
4253
+
"C4", # flake8-comprehensions
4257
+
"E501", # line too long, handled by black
4258
+
"B008", # do not perform function calls in argument defaults
4259
+
"C901", # too complex
4262
+
[tool.ruff.lint.per-file-ignores]
4263
+
"__init__.py" = ["F401"]
4266
+
python_version = "3.9"
4267
+
check_untyped_defs = true
4268
+
disallow_any_generics = true
4269
+
disallow_incomplete_defs = true
4270
+
disallow_untyped_defs = true
4271
+
no_implicit_optional = true
4272
+
warn_redundant_casts = true
4273
+
warn_unused_ignores = true
4274
+
warn_return_any = true
4275
+
strict_optional = true
4277
+
[[tool.mypy.overrides]]
4283
+
ignore_missing_imports = true
4285
+
[tool.pytest.ini_options]
4286
+
testpaths = ["tests"]
4287
+
python_files = ["test_*.py"]
4288
+
python_classes = ["Test*"]
4289
+
python_functions = ["test_*"]
4292
+
"--strict-markers",
4293
+
"--strict-config",
4294
+
"--cov=src/thicket",
4295
+
"--cov-report=term-missing",
4296
+
"--cov-report=html",
4297
+
"--cov-report=xml",
4299
+
filterwarnings = [
4301
+
"ignore::UserWarning",
4302
+
"ignore::DeprecationWarning",
4305
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
4306
+
"integration: marks tests as integration tests",
4309
+
[tool.coverage.run]
4313
+
[tool.coverage.report]
4315
+
"pragma: no cover",
4318
+
"if settings.DEBUG",
4319
+
"raise AssertionError",
4320
+
"raise NotImplementedError",
4322
+
"if __name__ == .__main__.:",
4323
+
"class .*\\bProtocol\\):",
4324
+
"@(abc\\.)?abstractmethod",
4328
+
<file path="src/thicket/cli/commands/__init__.py">
4329
+
"""CLI commands for thicket."""
4331
+
# Import all commands to register them with the main app
4332
+
from . import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync
4334
+
__all__ = ["add", "duplicates", "generate", "index_cmd", "info_cmd", "init", "links_cmd", "list_cmd", "sync"]
4337
+
<file path="src/thicket/cli/commands/add.py">
4338
+
"""Add command for thicket."""
4341
+
from pathlib import Path
4342
+
from typing import Optional
4345
+
from pydantic import HttpUrl, ValidationError
4347
+
from ...core.feed_parser import FeedParser
4348
+
from ...core.git_store import GitStore
4349
+
from ..main import app
4350
+
from ..utils import (
4359
+
@app.command("add")
4361
+
subcommand: str = typer.Argument(..., help="Subcommand: 'user' or 'feed'"),
4362
+
username: str = typer.Argument(..., help="Username"),
4363
+
feed_url: Optional[str] = typer.Argument(None, help="Feed URL (required for 'user' command)"),
4364
+
email: Optional[str] = typer.Option(None, "--email", "-e", help="User email"),
4365
+
homepage: Optional[str] = typer.Option(None, "--homepage", "-h", help="User homepage"),
4366
+
icon: Optional[str] = typer.Option(None, "--icon", "-i", help="User icon URL"),
4367
+
display_name: Optional[str] = typer.Option(None, "--display-name", "-d", help="User display name"),
4368
+
config_file: Optional[Path] = typer.Option(
4369
+
Path("thicket.yaml"), "--config", help="Configuration file path"
4371
+
auto_discover: bool = typer.Option(
4372
+
True, "--auto-discover/--no-auto-discover", help="Auto-discover user metadata from feed"
4375
+
"""Add a user or feed to thicket."""
4377
+
if subcommand == "user":
4378
+
add_user(username, feed_url, email, homepage, icon, display_name, config_file, auto_discover)
4379
+
elif subcommand == "feed":
4380
+
add_feed(username, feed_url, config_file)
4382
+
print_error(f"Unknown subcommand: {subcommand}")
4383
+
print_error("Use 'user' or 'feed'")
4384
+
raise typer.Exit(1)
4389
+
feed_url: Optional[str],
4390
+
email: Optional[str],
4391
+
homepage: Optional[str],
4392
+
icon: Optional[str],
4393
+
display_name: Optional[str],
4394
+
config_file: Path,
4395
+
auto_discover: bool,
4397
+
"""Add a new user with feed."""
4400
+
print_error("Feed URL is required when adding a user")
4401
+
raise typer.Exit(1)
4403
+
# Validate feed URL
4405
+
validated_feed_url = HttpUrl(feed_url)
4406
+
except ValidationError:
4407
+
print_error(f"Invalid feed URL: {feed_url}")
4408
+
raise typer.Exit(1) from None
4410
+
# Load configuration
4411
+
config = load_config(config_file)
4413
+
# Initialize Git store
4414
+
git_store = GitStore(config.git_store)
4416
+
# Check if user already exists
4417
+
existing_user = git_store.get_user(username)
4419
+
print_error(f"User '{username}' already exists")
4420
+
print_error("Use 'thicket add feed' to add additional feeds")
4421
+
raise typer.Exit(1)
4423
+
# Auto-discover metadata if enabled
4424
+
discovered_metadata = None
4426
+
discovered_metadata = asyncio.run(discover_feed_metadata(validated_feed_url))
4428
+
# Prepare user data with manual overrides taking precedence
4429
+
user_display_name = display_name or (discovered_metadata.author_name or discovered_metadata.title if discovered_metadata else None)
4430
+
user_email = email or (discovered_metadata.author_email if discovered_metadata else None)
4431
+
user_homepage = homepage or (str(discovered_metadata.author_uri or discovered_metadata.link) if discovered_metadata else None)
4432
+
user_icon = icon or (str(discovered_metadata.logo or discovered_metadata.icon or discovered_metadata.image_url) if discovered_metadata else None)
4434
+
# Add user to Git store
4435
+
git_store.add_user(
4436
+
username=username,
4437
+
display_name=user_display_name,
4439
+
homepage=user_homepage,
4441
+
feeds=[str(validated_feed_url)],
4445
+
git_store.commit_changes(f"Add user: {username}")
4447
+
print_success(f"Added user '{username}' with feed: {feed_url}")
4449
+
if discovered_metadata and auto_discover:
4450
+
print_info("Auto-discovered metadata:")
4451
+
if user_display_name:
4452
+
print_info(f" Display name: {user_display_name}")
4454
+
print_info(f" Email: {user_email}")
4456
+
print_info(f" Homepage: {user_homepage}")
4458
+
print_info(f" Icon: {user_icon}")
4461
+
def add_feed(username: str, feed_url: Optional[str], config_file: Path) -> None:
4462
+
"""Add a feed to an existing user."""
4465
+
print_error("Feed URL is required")
4466
+
raise typer.Exit(1)
4468
+
# Validate feed URL
4470
+
validated_feed_url = HttpUrl(feed_url)
4471
+
except ValidationError:
4472
+
print_error(f"Invalid feed URL: {feed_url}")
4473
+
raise typer.Exit(1) from None
4475
+
# Load configuration
4476
+
config = load_config(config_file)
4478
+
# Initialize Git store
4479
+
git_store = GitStore(config.git_store)
4481
+
# Check if user exists
4482
+
user = git_store.get_user(username)
4484
+
print_error(f"User '{username}' not found")
4485
+
print_error("Use 'thicket add user' to add a new user")
4486
+
raise typer.Exit(1)
4488
+
# Check if feed already exists
4489
+
if str(validated_feed_url) in user.feeds:
4490
+
print_error(f"Feed already exists for user '{username}': {feed_url}")
4491
+
raise typer.Exit(1)
4493
+
# Add feed to user
4494
+
updated_feeds = user.feeds + [str(validated_feed_url)]
4495
+
if git_store.update_user(username, feeds=updated_feeds):
4496
+
git_store.commit_changes(f"Add feed to user {username}: {feed_url}")
4497
+
print_success(f"Added feed to user '{username}': {feed_url}")
4499
+
print_error(f"Failed to add feed to user '{username}'")
4500
+
raise typer.Exit(1)
4503
+
async def discover_feed_metadata(feed_url: HttpUrl):
4504
+
"""Discover metadata from a feed URL."""
4506
+
with create_progress() as progress:
4507
+
task = progress.add_task("Discovering feed metadata...", total=None)
4509
+
parser = FeedParser()
4510
+
content = await parser.fetch_feed(feed_url)
4511
+
metadata, _ = parser.parse_feed(content, feed_url)
4513
+
progress.update(task, completed=True)
4516
+
except Exception as e:
4517
+
print_error(f"Failed to discover feed metadata: {e}")
4521
+
<file path="src/thicket/cli/commands/duplicates.py">
4522
+
"""Duplicates command for thicket."""
4524
+
from pathlib import Path
4525
+
from typing import Optional
4528
+
from rich.table import Table
4530
+
from ...core.git_store import GitStore
4531
+
from ..main import app
4532
+
from ..utils import (
4542
+
@app.command("duplicates")
4543
+
def duplicates_command(
4544
+
action: str = typer.Argument(..., help="Action: 'list', 'add', 'remove'"),
4545
+
duplicate_id: Optional[str] = typer.Argument(None, help="Duplicate entry ID"),
4546
+
canonical_id: Optional[str] = typer.Argument(None, help="Canonical entry ID"),
4547
+
config_file: Optional[Path] = typer.Option(
4548
+
Path("thicket.yaml"), "--config", help="Configuration file path"
4551
+
"""Manage duplicate entry mappings."""
4553
+
# Load configuration
4554
+
config = load_config(config_file)
4556
+
# Initialize Git store
4557
+
git_store = GitStore(config.git_store)
4559
+
if action == "list":
4560
+
list_duplicates(git_store)
4561
+
elif action == "add":
4562
+
add_duplicate(git_store, duplicate_id, canonical_id)
4563
+
elif action == "remove":
4564
+
remove_duplicate(git_store, duplicate_id)
4566
+
print_error(f"Unknown action: {action}")
4567
+
print_error("Use 'list', 'add', or 'remove'")
4568
+
raise typer.Exit(1)
4571
+
def list_duplicates(git_store: GitStore) -> None:
4572
+
"""List all duplicate mappings."""
4573
+
duplicates = git_store.get_duplicates()
4575
+
if not duplicates.duplicates:
4576
+
if get_tsv_mode():
4577
+
print("No duplicate mappings found")
4579
+
print_info("No duplicate mappings found")
4582
+
if get_tsv_mode():
4583
+
print("Duplicate ID\tCanonical ID")
4584
+
for duplicate_id, canonical_id in duplicates.duplicates.items():
4585
+
print(f"{duplicate_id}\t{canonical_id}")
4586
+
print(f"Total duplicates: {len(duplicates.duplicates)}")
4588
+
table = Table(title="Duplicate Entry Mappings")
4589
+
table.add_column("Duplicate ID", style="red")
4590
+
table.add_column("Canonical ID", style="green")
4592
+
for duplicate_id, canonical_id in duplicates.duplicates.items():
4593
+
table.add_row(duplicate_id, canonical_id)
4595
+
console.print(table)
4596
+
print_info(f"Total duplicates: {len(duplicates.duplicates)}")
4599
+
def add_duplicate(git_store: GitStore, duplicate_id: Optional[str], canonical_id: Optional[str]) -> None:
4600
+
"""Add a duplicate mapping."""
4601
+
if not duplicate_id:
4602
+
print_error("Duplicate ID is required")
4603
+
raise typer.Exit(1)
4605
+
if not canonical_id:
4606
+
print_error("Canonical ID is required")
4607
+
raise typer.Exit(1)
4609
+
# Check if duplicate_id already exists
4610
+
duplicates = git_store.get_duplicates()
4611
+
if duplicates.is_duplicate(duplicate_id):
4612
+
existing_canonical = duplicates.get_canonical(duplicate_id)
4613
+
print_error(f"Duplicate ID already mapped to: {existing_canonical}")
4614
+
print_error("Use 'remove' first to change the mapping")
4615
+
raise typer.Exit(1)
4617
+
# Check if we're trying to make a canonical ID point to itself
4618
+
if duplicate_id == canonical_id:
4619
+
print_error("Duplicate ID cannot be the same as canonical ID")
4620
+
raise typer.Exit(1)
4623
+
git_store.add_duplicate(duplicate_id, canonical_id)
4626
+
git_store.commit_changes(f"Add duplicate mapping: {duplicate_id} -> {canonical_id}")
4628
+
print_success(f"Added duplicate mapping: {duplicate_id} -> {canonical_id}")
4631
+
def remove_duplicate(git_store: GitStore, duplicate_id: Optional[str]) -> None:
4632
+
"""Remove a duplicate mapping."""
4633
+
if not duplicate_id:
4634
+
print_error("Duplicate ID is required")
4635
+
raise typer.Exit(1)
4637
+
# Check if mapping exists
4638
+
duplicates = git_store.get_duplicates()
4639
+
if not duplicates.is_duplicate(duplicate_id):
4640
+
print_error(f"No duplicate mapping found for: {duplicate_id}")
4641
+
raise typer.Exit(1)
4643
+
canonical_id = duplicates.get_canonical(duplicate_id)
4645
+
# Remove the mapping
4646
+
if git_store.remove_duplicate(duplicate_id):
4648
+
git_store.commit_changes(f"Remove duplicate mapping: {duplicate_id} -> {canonical_id}")
4649
+
print_success(f"Removed duplicate mapping: {duplicate_id} -> {canonical_id}")
4651
+
print_error(f"Failed to remove duplicate mapping: {duplicate_id}")
4652
+
raise typer.Exit(1)
4655
+
<file path="src/thicket/cli/commands/sync.py">
4656
+
"""Sync command for thicket."""
4659
+
from pathlib import Path
4660
+
from typing import Optional
4663
+
from rich.progress import track
4665
+
from ...core.feed_parser import FeedParser
4666
+
from ...core.git_store import GitStore
4667
+
from ..main import app
4668
+
from ..utils import (
4678
+
all_users: bool = typer.Option(
4679
+
False, "--all", "-a", help="Sync all users and feeds"
4681
+
user: Optional[str] = typer.Option(
4682
+
None, "--user", "-u", help="Sync specific user only"
4684
+
config_file: Optional[Path] = typer.Option(
4685
+
Path("thicket.yaml"), "--config", help="Configuration file path"
4687
+
dry_run: bool = typer.Option(
4688
+
False, "--dry-run", help="Show what would be synced without making changes"
4691
+
"""Sync feeds and store entries in Git repository."""
4693
+
# Load configuration
4694
+
config = load_config(config_file)
4696
+
# Initialize Git store
4697
+
git_store = GitStore(config.git_store)
4699
+
# Determine which users to sync from git repository
4700
+
users_to_sync = []
4702
+
index = git_store._load_index()
4703
+
users_to_sync = list(index.users.values())
4705
+
user_metadata = git_store.get_user(user)
4706
+
if not user_metadata:
4707
+
print_error(f"User '{user}' not found in git repository")
4708
+
raise typer.Exit(1)
4709
+
users_to_sync = [user_metadata]
4711
+
print_error("Specify --all to sync all users or --user to sync a specific user")
4712
+
raise typer.Exit(1)
4714
+
if not users_to_sync:
4715
+
print_info("No users configured to sync")
4719
+
total_new_entries = 0
4720
+
total_updated_entries = 0
4722
+
for user_metadata in users_to_sync:
4723
+
print_info(f"Syncing user: {user_metadata.username}")
4725
+
user_new_entries = 0
4726
+
user_updated_entries = 0
4728
+
# Sync each feed for the user
4729
+
for feed_url in track(user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"):
4731
+
new_entries, updated_entries = asyncio.run(
4732
+
sync_feed(git_store, user_metadata.username, feed_url, dry_run)
4734
+
user_new_entries += new_entries
4735
+
user_updated_entries += updated_entries
4737
+
except Exception as e:
4738
+
print_error(f"Failed to sync feed {feed_url}: {e}")
4741
+
print_info(f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated")
4742
+
total_new_entries += user_new_entries
4743
+
total_updated_entries += user_updated_entries
4745
+
# Commit changes if not dry run
4746
+
if not dry_run and (total_new_entries > 0 or total_updated_entries > 0):
4747
+
commit_message = f"Sync feeds: {total_new_entries} new entries, {total_updated_entries} updated"
4748
+
git_store.commit_changes(commit_message)
4749
+
print_success(f"Committed changes: {commit_message}")
4753
+
print_info(f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated")
4755
+
print_success(f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated")
4758
+
async def sync_feed(git_store: GitStore, username: str, feed_url, dry_run: bool) -> tuple[int, int]:
4759
+
"""Sync a single feed for a user."""
4761
+
parser = FeedParser()
4764
+
# Fetch and parse feed
4765
+
content = await parser.fetch_feed(feed_url)
4766
+
metadata, entries = parser.parse_feed(content, feed_url)
4769
+
updated_entries = 0
4771
+
# Process each entry
4772
+
for entry in entries:
4774
+
# Check if entry already exists
4775
+
existing_entry = git_store.get_entry(username, entry.id)
4777
+
if existing_entry:
4778
+
# Check if entry has been updated
4779
+
if existing_entry.updated != entry.updated:
4781
+
git_store.store_entry(username, entry)
4782
+
updated_entries += 1
4786
+
git_store.store_entry(username, entry)
4789
+
except Exception as e:
4790
+
print_error(f"Failed to process entry {entry.id}: {e}")
4793
+
return new_entries, updated_entries
4795
+
except Exception as e:
4796
+
print_error(f"Failed to sync feed {feed_url}: {e}")
4800
+
<file path="src/thicket/models/config.py">
4801
+
"""Configuration models for thicket."""
4803
+
from pathlib import Path
4804
+
from typing import Optional
4806
+
from pydantic import BaseModel, EmailStr, HttpUrl
4807
+
from pydantic_settings import BaseSettings, SettingsConfigDict
4810
+
class UserConfig(BaseModel):
4811
+
"""Configuration for a single user and their feeds."""
4814
+
feeds: list[HttpUrl]
4815
+
email: Optional[EmailStr] = None
4816
+
homepage: Optional[HttpUrl] = None
4817
+
icon: Optional[HttpUrl] = None
4818
+
display_name: Optional[str] = None
4821
+
class ThicketConfig(BaseSettings):
4822
+
"""Main configuration for thicket."""
4824
+
model_config = SettingsConfigDict(
4825
+
env_prefix="THICKET_",
4827
+
yaml_file="thicket.yaml",
4828
+
case_sensitive=False,
4833
+
users: list[UserConfig] = []
4836
+
<file path="src/thicket/cli/commands/links_cmd.py">
4837
+
"""CLI command for extracting and categorizing all outbound links from blog entries."""
4841
+
from pathlib import Path
4842
+
from typing import Dict, List, Optional, Set
4843
+
from urllib.parse import urljoin, urlparse
4846
+
from rich.console import Console
4847
+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
4848
+
from rich.table import Table
4850
+
from ...core.git_store import GitStore
4851
+
from ..main import app
4852
+
from ..utils import load_config, get_tsv_mode
4854
+
console = Console()
4858
+
"""Represents a link found in a blog entry."""
4860
+
def __init__(self, url: str, entry_id: str, username: str):
4862
+
self.entry_id = entry_id
4863
+
self.username = username
4865
+
def to_dict(self) -> dict:
4866
+
"""Convert to dictionary for JSON serialization."""
4869
+
"entry_id": self.entry_id,
4870
+
"username": self.username
4874
+
def from_dict(cls, data: dict) -> "LinkData":
4875
+
"""Create from dictionary."""
4878
+
entry_id=data["entry_id"],
4879
+
username=data["username"]
4883
+
class LinkCategorizer:
4884
+
"""Categorizes links as internal, user, or unknown."""
4886
+
def __init__(self, user_domains: Dict[str, Set[str]]):
4887
+
self.user_domains = user_domains
4888
+
# Create reverse mapping of domain -> username
4889
+
self.domain_to_user = {}
4890
+
for username, domains in user_domains.items():
4891
+
for domain in domains:
4892
+
self.domain_to_user[domain] = username
4894
+
def categorize_url(self, url: str, source_username: str) -> tuple[str, Optional[str]]:
4896
+
Categorize a URL as 'internal', 'user', or 'unknown'.
4897
+
Returns (category, target_username).
4900
+
parsed = urlparse(url)
4901
+
domain = parsed.netloc.lower()
4903
+
# Check if it's a link to the same user's domain (internal)
4904
+
if domain in self.user_domains.get(source_username, set()):
4905
+
return "internal", source_username
4907
+
# Check if it's a link to another user's domain
4908
+
if domain in self.domain_to_user:
4909
+
return "user", self.domain_to_user[domain]
4911
+
# Everything else is unknown
4912
+
return "unknown", None
4915
+
return "unknown", None
4918
+
class LinkExtractor:
4919
+
"""Extracts and resolves links from blog entries."""
4921
+
def __init__(self):
4922
+
# Pattern for extracting links from HTML
4923
+
self.link_pattern = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL)
4924
+
self.url_pattern = re.compile(r'https?://[^\s<>"]+')
4926
+
def extract_links_from_html(self, html_content: str, base_url: str) -> List[tuple[str, str]]:
4927
+
"""Extract all links from HTML content and resolve them against base URL."""
4930
+
# Extract links from <a> tags
4931
+
for match in self.link_pattern.finditer(html_content):
4932
+
url = match.group(1)
4933
+
text = re.sub(r'<[^>]+>', '', match.group(2)).strip() # Remove HTML tags from link text
4935
+
# Resolve relative URLs against base URL
4936
+
resolved_url = urljoin(base_url, url)
4937
+
links.append((resolved_url, text))
4942
+
def extract_links_from_entry(self, entry, username: str, base_url: str) -> List[LinkData]:
4943
+
"""Extract all links from a blog entry."""
4946
+
# Combine all text content for analysis
4947
+
content_to_search = []
4949
+
content_to_search.append(entry.content)
4951
+
content_to_search.append(entry.summary)
4953
+
for content in content_to_search:
4954
+
extracted_links = self.extract_links_from_html(content, base_url)
4956
+
for url, link_text in extracted_links:
4958
+
if not url or url.startswith('#'):
4961
+
link_data = LinkData(
4963
+
entry_id=entry.id,
4967
+
links.append(link_data)
4974
+
config_file: Optional[Path] = typer.Option(
4975
+
Path("thicket.yaml"),
4978
+
help="Path to configuration file",
4980
+
output_file: Optional[Path] = typer.Option(
4984
+
help="Path to output unified links file (default: links.json in git store)",
4986
+
verbose: bool = typer.Option(
4990
+
help="Show detailed progress information",
4993
+
"""Extract and categorize all outbound links from blog entries.
4995
+
This command analyzes all blog entries to extract outbound links,
4996
+
resolve them properly with respect to the feed's base URL, and
4997
+
categorize them as internal, user, or unknown links.
4999
+
Creates a unified links.json file containing all link data.
5002
+
# Load configuration
5003
+
config = load_config(config_file)
5005
+
# Initialize Git store
5006
+
git_store = GitStore(config.git_store)
5008
+
# Build user domain mapping
5010
+
console.print("Building user domain mapping...")
5012
+
index = git_store._load_index()
5015
+
for username, user_metadata in index.users.items():
5018
+
# Add domains from feeds
5019
+
for feed_url in user_metadata.feeds:
5020
+
domain = urlparse(feed_url).netloc.lower()
5022
+
domains.add(domain)
5024
+
# Add domain from homepage
5025
+
if user_metadata.homepage:
5026
+
domain = urlparse(str(user_metadata.homepage)).netloc.lower()
5028
+
domains.add(domain)
5030
+
user_domains[username] = domains
5033
+
console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")
5035
+
# Initialize components
5036
+
link_extractor = LinkExtractor()
5037
+
categorizer = LinkCategorizer(user_domains)
5040
+
users = list(index.users.keys())
5043
+
console.print("[yellow]No users found in Git store[/yellow]")
5044
+
raise typer.Exit(0)
5046
+
# Process all entries
5048
+
link_categories = {"internal": [], "user": [], "unknown": []}
5049
+
link_dict = {} # Dictionary with link URL as key, maps to list of atom IDs
5050
+
reverse_dict = {} # Dictionary with atom ID as key, maps to list of URLs
5054
+
TextColumn("[progress.description]{task.description}"),
5056
+
TaskProgressColumn(),
5060
+
# Count total entries first
5061
+
counting_task = progress.add_task("Counting entries...", total=len(users))
5064
+
for username in users:
5065
+
entries = git_store.list_entries(username)
5066
+
total_entries += len(entries)
5067
+
progress.advance(counting_task)
5069
+
progress.remove_task(counting_task)
5072
+
processing_task = progress.add_task(
5073
+
f"Processing {total_entries} entries...",
5074
+
total=total_entries
5077
+
for username in users:
5078
+
entries = git_store.list_entries(username)
5079
+
user_metadata = index.users[username]
5081
+
# Get base URL for this user (use first feed URL)
5082
+
base_url = str(user_metadata.feeds[0]) if user_metadata.feeds else "https://example.com"
5084
+
for entry in entries:
5085
+
# Extract links from this entry
5086
+
entry_links = link_extractor.extract_links_from_entry(entry, username, base_url)
5088
+
# Track unique links per entry
5089
+
entry_urls_seen = set()
5091
+
# Categorize each link
5092
+
for link_data in entry_links:
5093
+
# Skip if we've already seen this URL in this entry
5094
+
if link_data.url in entry_urls_seen:
5096
+
entry_urls_seen.add(link_data.url)
5098
+
category, target_username = categorizer.categorize_url(link_data.url, username)
5100
+
# Add to link dictionary (URL as key, maps to list of atom IDs)
5101
+
if link_data.url not in link_dict:
5102
+
link_dict[link_data.url] = []
5103
+
if link_data.entry_id not in link_dict[link_data.url]:
5104
+
link_dict[link_data.url].append(link_data.entry_id)
5106
+
# Also add to reverse mapping (atom ID -> list of URLs)
5107
+
if link_data.entry_id not in reverse_dict:
5108
+
reverse_dict[link_data.entry_id] = []
5109
+
if link_data.url not in reverse_dict[link_data.entry_id]:
5110
+
reverse_dict[link_data.entry_id].append(link_data.url)
5112
+
# Add category info to link data for categories tracking
5113
+
link_info = link_data.to_dict()
5114
+
link_info["category"] = category
5115
+
link_info["target_username"] = target_username
5117
+
all_links.append(link_info)
5118
+
link_categories[category].append(link_info)
5120
+
progress.advance(processing_task)
5122
+
if verbose and entry_links:
5123
+
console.print(f" Found {len(entry_links)} links in {username}:{entry.title[:50]}...")
5125
+
# Determine output path
5127
+
output_path = output_file
5129
+
output_path = config.git_store / "links.json"
5131
+
# Save all extracted links (not just filtered ones)
5133
+
console.print("Preparing output data...")
5135
+
# Build a set of all URLs that correspond to posts in the git database
5136
+
registered_urls = set()
5138
+
# Get all entries from all users and build URL mappings
5139
+
for username in users:
5140
+
entries = git_store.list_entries(username)
5141
+
user_metadata = index.users[username]
5143
+
for entry in entries:
5144
+
# Try to match entry URLs with extracted links
5145
+
if hasattr(entry, 'link') and entry.link:
5146
+
registered_urls.add(str(entry.link))
5148
+
# Also check entry alternate links if they exist
5149
+
if hasattr(entry, 'links') and entry.links:
5150
+
for link in entry.links:
5151
+
if hasattr(link, 'href') and link.href:
5152
+
registered_urls.add(str(link.href))
5154
+
# Build unified structure with metadata
5155
+
unified_links = {}
5156
+
reverse_mapping = {}
5158
+
for url, entry_ids in link_dict.items():
5159
+
unified_links[url] = {
5160
+
"referencing_entries": entry_ids
5163
+
# Find target username if this is a tracked post
5164
+
if url in registered_urls:
5165
+
for username in users:
5166
+
user_domains_set = {domain for domain in user_domains.get(username, [])}
5167
+
if any(domain in url for domain in user_domains_set):
5168
+
unified_links[url]["target_username"] = username
5171
+
# Build reverse mapping
5172
+
for entry_id in entry_ids:
5173
+
if entry_id not in reverse_mapping:
5174
+
reverse_mapping[entry_id] = []
5175
+
if url not in reverse_mapping[entry_id]:
5176
+
reverse_mapping[entry_id].append(url)
5178
+
# Create unified output data
5180
+
"links": unified_links,
5181
+
"reverse_mapping": reverse_mapping,
5182
+
"user_domains": {k: list(v) for k, v in user_domains.items()}
5186
+
console.print(f"Found {len(registered_urls)} registered post URLs")
5187
+
console.print(f"Found {len(link_dict)} total links, {sum(1 for link in unified_links.values() if 'target_username' in link)} tracked posts")
5189
+
# Save unified data
5190
+
with open(output_path, "w") as f:
5191
+
json.dump(output_data, f, indent=2, default=str)
5194
+
if not get_tsv_mode():
5195
+
console.print("\n[green]โ Links extraction completed successfully[/green]")
5197
+
# Create summary table or TSV output
5198
+
if get_tsv_mode():
5199
+
print("Category\tCount\tDescription")
5200
+
print(f"Internal\t{len(link_categories['internal'])}\tLinks to same user's domain")
5201
+
print(f"User\t{len(link_categories['user'])}\tLinks to other tracked users")
5202
+
print(f"Unknown\t{len(link_categories['unknown'])}\tLinks to external sites")
5203
+
print(f"Total Extracted\t{len(all_links)}\tAll extracted links")
5204
+
print(f"Saved to Output\t{len(output_data['links'])}\tLinks saved to output file")
5205
+
print(f"Cross-references\t{sum(1 for link in unified_links.values() if 'target_username' in link)}\tLinks to registered posts only")
5207
+
table = Table(title="Links Summary")
5208
+
table.add_column("Category", style="cyan")
5209
+
table.add_column("Count", style="green")
5210
+
table.add_column("Description", style="white")
5212
+
table.add_row("Internal", str(len(link_categories["internal"])), "Links to same user's domain")
5213
+
table.add_row("User", str(len(link_categories["user"])), "Links to other tracked users")
5214
+
table.add_row("Unknown", str(len(link_categories["unknown"])), "Links to external sites")
5215
+
table.add_row("Total Extracted", str(len(all_links)), "All extracted links")
5216
+
table.add_row("Saved to Output", str(len(output_data['links'])), "Links saved to output file")
5217
+
table.add_row("Cross-references", str(sum(1 for link in unified_links.values() if 'target_username' in link)), "Links to registered posts only")
5219
+
console.print(table)
5221
+
# Show user links if verbose
5222
+
if verbose and link_categories["user"]:
5223
+
if get_tsv_mode():
5224
+
print("User Link Source\tUser Link Target\tLink Count")
5225
+
user_link_counts = {}
5227
+
for link in link_categories["user"]:
5228
+
key = f"{link['username']} -> {link['target_username']}"
5229
+
user_link_counts[key] = user_link_counts.get(key, 0) + 1
5231
+
for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
5232
+
source, target = link_pair.split(" -> ")
5233
+
print(f"{source}\t{target}\t{count}")
5235
+
console.print("\n[bold]User-to-user links:[/bold]")
5236
+
user_link_counts = {}
5238
+
for link in link_categories["user"]:
5239
+
key = f"{link['username']} -> {link['target_username']}"
5240
+
user_link_counts[key] = user_link_counts.get(key, 0) + 1
5242
+
for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
5243
+
console.print(f" {link_pair}: {count} links")
5245
+
if not get_tsv_mode():
5246
+
console.print(f"\nUnified links data saved to: {output_path}")
5248
+
except Exception as e:
5249
+
console.print(f"[red]Error extracting links: {e}[/red]")
5251
+
console.print_exception()
5252
+
raise typer.Exit(1)
5255
+
<file path="src/thicket/cli/commands/list_cmd.py">
5256
+
"""List command for thicket."""
5259
+
from pathlib import Path
5260
+
from typing import Optional
5263
+
from rich.table import Table
5265
+
from ...core.git_store import GitStore
5266
+
from ..main import app
5267
+
from ..utils import (
5271
+
print_feeds_table,
5272
+
print_feeds_table_from_git,
5274
+
print_users_table,
5275
+
print_users_table_from_git,
5276
+
print_entries_tsv,
5281
+
@app.command("list")
5283
+
what: str = typer.Argument(..., help="What to list: 'users', 'feeds', 'entries'"),
5284
+
user: Optional[str] = typer.Option(
5285
+
None, "--user", "-u", help="Filter by specific user"
5287
+
limit: Optional[int] = typer.Option(
5288
+
None, "--limit", "-l", help="Limit number of results"
5290
+
config_file: Optional[Path] = typer.Option(
5291
+
Path("thicket.yaml"), "--config", help="Configuration file path"
5294
+
"""List users, feeds, or entries."""
5296
+
# Load configuration
5297
+
config = load_config(config_file)
5299
+
# Initialize Git store
5300
+
git_store = GitStore(config.git_store)
5302
+
if what == "users":
5303
+
list_users(git_store)
5304
+
elif what == "feeds":
5305
+
list_feeds(git_store, user)
5306
+
elif what == "entries":
5307
+
list_entries(git_store, user, limit)
5309
+
print_error(f"Unknown list type: {what}")
5310
+
print_error("Use 'users', 'feeds', or 'entries'")
5311
+
raise typer.Exit(1)
5314
+
def list_users(git_store: GitStore) -> None:
5315
+
"""List all users."""
5316
+
index = git_store._load_index()
5317
+
users = list(index.users.values())
5320
+
print_info("No users configured")
5323
+
print_users_table_from_git(users)
5326
+
def list_feeds(git_store: GitStore, username: Optional[str] = None) -> None:
5327
+
"""List feeds, optionally filtered by user."""
5329
+
user = git_store.get_user(username)
5331
+
print_error(f"User '{username}' not found")
5332
+
raise typer.Exit(1)
5334
+
if not user.feeds:
5335
+
print_info(f"No feeds configured for user '{username}'")
5338
+
print_feeds_table_from_git(git_store, username)
5341
+
def list_entries(git_store: GitStore, username: Optional[str] = None, limit: Optional[int] = None) -> None:
5342
+
"""List entries, optionally filtered by user."""
5345
+
# List entries for specific user
5346
+
user = git_store.get_user(username)
5348
+
print_error(f"User '{username}' not found")
5349
+
raise typer.Exit(1)
5351
+
entries = git_store.list_entries(username, limit)
5353
+
print_info(f"No entries found for user '{username}'")
5356
+
print_entries_table([entries], [username])
5359
+
# List entries for all users
5361
+
all_usernames = []
5363
+
index = git_store._load_index()
5364
+
for user in index.users.values():
5365
+
entries = git_store.list_entries(user.username, limit)
5367
+
all_entries.append(entries)
5368
+
all_usernames.append(user.username)
5370
+
if not all_entries:
5371
+
print_info("No entries found")
5374
+
print_entries_table(all_entries, all_usernames)
5377
+
def _clean_html_content(content: Optional[str]) -> str:
5378
+
"""Clean HTML content for display in table."""
5382
+
# Remove HTML tags
5383
+
clean_text = re.sub(r'<[^>]+>', ' ', content)
5384
+
# Replace multiple whitespace with single space
5385
+
clean_text = re.sub(r'\s+', ' ', clean_text)
5386
+
# Strip and limit length
5387
+
clean_text = clean_text.strip()
5388
+
if len(clean_text) > 100:
5389
+
clean_text = clean_text[:97] + "..."
5394
+
def print_entries_table(entries_by_user: list[list], usernames: list[str]) -> None:
5395
+
"""Print a table of entries."""
5396
+
if get_tsv_mode():
5397
+
print_entries_tsv(entries_by_user, usernames)
5400
+
table = Table(title="Feed Entries")
5401
+
table.add_column("User", style="cyan", no_wrap=True)
5402
+
table.add_column("Title", style="bold")
5403
+
table.add_column("Updated", style="blue")
5404
+
table.add_column("URL", style="green")
5406
+
# Combine all entries with usernames
5408
+
for entries, username in zip(entries_by_user, usernames):
5409
+
for entry in entries:
5410
+
all_entries.append((username, entry))
5412
+
# Sort by updated time (newest first)
5413
+
all_entries.sort(key=lambda x: x[1].updated, reverse=True)
5415
+
for username, entry in all_entries:
5416
+
# Format updated time
5417
+
updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")
5419
+
# Truncate title if too long
5420
+
title = entry.title
5421
+
if len(title) > 50:
5422
+
title = title[:47] + "..."
5431
+
console.print(table)
5434
+
<file path="src/thicket/cli/main.py">
5435
+
"""Main CLI application using Typer."""
5438
+
from rich.console import Console
5440
+
from .. import __version__
5442
+
app = typer.Typer(
5444
+
help="A CLI tool for persisting Atom/RSS feeds in Git repositories",
5445
+
no_args_is_help=True,
5446
+
rich_markup_mode="rich",
5449
+
console = Console()
5451
+
# Global state for TSV output mode
5455
+
def version_callback(value: bool) -> None:
5456
+
"""Show version and exit."""
5458
+
console.print(f"thicket version {__version__}")
5459
+
raise typer.Exit()
5464
+
version: bool = typer.Option(
5468
+
help="Show the version and exit",
5469
+
callback=version_callback,
5472
+
tsv: bool = typer.Option(
5475
+
help="Output in tab-separated values format without truncation",
5478
+
"""Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories."""
5483
+
# Import commands to register them
5484
+
from .commands import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync
5486
+
if __name__ == "__main__":
5490
+
<file path="src/thicket/core/git_store.py">
5491
+
"""Git repository operations for thicket."""
5494
+
from datetime import datetime
5495
+
from pathlib import Path
5496
+
from typing import Optional
5499
+
from git import Repo
5501
+
from ..models import AtomEntry, DuplicateMap, GitStoreIndex, UserMetadata
5505
+
"""Manages the Git repository for storing feed entries."""
5507
+
def __init__(self, repo_path: Path):
5508
+
"""Initialize the Git store."""
5509
+
self.repo_path = repo_path
5510
+
self.repo: Optional[Repo] = None
5511
+
self._ensure_repo()
5513
+
def _ensure_repo(self) -> None:
5514
+
"""Ensure the Git repository exists and is initialized."""
5515
+
if not self.repo_path.exists():
5516
+
self.repo_path.mkdir(parents=True, exist_ok=True)
5519
+
self.repo = Repo(self.repo_path)
5520
+
except git.InvalidGitRepositoryError:
5521
+
# Initialize new repository
5522
+
self.repo = Repo.init(self.repo_path)
5523
+
self._create_initial_structure()
5525
+
def _create_initial_structure(self) -> None:
5526
+
"""Create initial Git store structure."""
5527
+
# Create index.json
5528
+
index = GitStoreIndex(
5529
+
created=datetime.now(),
5530
+
last_updated=datetime.now(),
5532
+
self._save_index(index)
5534
+
# Create duplicates.json
5535
+
duplicates = DuplicateMap()
5536
+
self._save_duplicates(duplicates)
5538
+
# Create initial commit
5539
+
self.repo.index.add(["index.json", "duplicates.json"])
5540
+
self.repo.index.commit("Initial thicket repository structure")
5542
+
def _save_index(self, index: GitStoreIndex) -> None:
5543
+
"""Save the index to index.json."""
5544
+
index_path = self.repo_path / "index.json"
5545
+
with open(index_path, "w") as f:
5546
+
json.dump(index.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)
5548
+
def _load_index(self) -> GitStoreIndex:
5549
+
"""Load the index from index.json."""
5550
+
index_path = self.repo_path / "index.json"
5551
+
if not index_path.exists():
5552
+
return GitStoreIndex(
5553
+
created=datetime.now(),
5554
+
last_updated=datetime.now(),
5557
+
with open(index_path) as f:
5558
+
data = json.load(f)
5560
+
return GitStoreIndex(**data)
5562
+
def _save_duplicates(self, duplicates: DuplicateMap) -> None:
5563
+
"""Save duplicates map to duplicates.json."""
5564
+
duplicates_path = self.repo_path / "duplicates.json"
5565
+
with open(duplicates_path, "w") as f:
5566
+
json.dump(duplicates.model_dump(exclude_none=True), f, indent=2)
5568
+
def _load_duplicates(self) -> DuplicateMap:
5569
+
"""Load duplicates map from duplicates.json."""
5570
+
duplicates_path = self.repo_path / "duplicates.json"
5571
+
if not duplicates_path.exists():
5572
+
return DuplicateMap()
5574
+
with open(duplicates_path) as f:
5575
+
data = json.load(f)
5577
+
return DuplicateMap(**data)
5579
+
def add_user(self, username: str, display_name: Optional[str] = None,
5580
+
email: Optional[str] = None, homepage: Optional[str] = None,
5581
+
icon: Optional[str] = None, feeds: Optional[list[str]] = None) -> UserMetadata:
5582
+
"""Add a new user to the Git store."""
5583
+
index = self._load_index()
5585
+
# Create user directory
5586
+
user_dir = self.repo_path / username
5587
+
user_dir.mkdir(exist_ok=True)
5589
+
# Create user metadata
5590
+
user_metadata = UserMetadata(
5591
+
username=username,
5592
+
display_name=display_name,
5594
+
homepage=homepage,
5596
+
feeds=feeds or [],
5597
+
directory=username,
5598
+
created=datetime.now(),
5599
+
last_updated=datetime.now(),
5604
+
index.add_user(user_metadata)
5605
+
self._save_index(index)
5607
+
return user_metadata
5609
+
def get_user(self, username: str) -> Optional[UserMetadata]:
5610
+
"""Get user metadata by username."""
5611
+
index = self._load_index()
5612
+
return index.get_user(username)
5614
+
def update_user(self, username: str, **kwargs) -> bool:
5615
+
"""Update user metadata."""
5616
+
index = self._load_index()
5617
+
user = index.get_user(username)
5622
+
# Update user metadata
5623
+
for key, value in kwargs.items():
5624
+
if hasattr(user, key) and value is not None:
5625
+
setattr(user, key, value)
5627
+
user.update_timestamp()
5631
+
index.add_user(user)
5632
+
self._save_index(index)
5636
+
def store_entry(self, username: str, entry: AtomEntry) -> bool:
5637
+
"""Store an entry in the user's directory."""
5638
+
user = self.get_user(username)
5642
+
# Sanitize entry ID for filename
5643
+
from .feed_parser import FeedParser
5644
+
parser = FeedParser()
5645
+
safe_id = parser.sanitize_entry_id(entry.id)
5647
+
# Create entry file
5648
+
user_dir = self.repo_path / user.directory
5649
+
entry_path = user_dir / f"{safe_id}.json"
5651
+
# Check if entry already exists
5652
+
entry_exists = entry_path.exists()
5655
+
with open(entry_path, "w") as f:
5656
+
json.dump(entry.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)
5658
+
# Update user metadata if new entry
5659
+
if not entry_exists:
5660
+
index = self._load_index()
5661
+
index.update_entry_count(username, 1)
5662
+
self._save_index(index)
5666
+
def get_entry(self, username: str, entry_id: str) -> Optional[AtomEntry]:
5667
+
"""Get an entry by username and entry ID."""
5668
+
user = self.get_user(username)
5672
+
# Sanitize entry ID
5673
+
from .feed_parser import FeedParser
5674
+
parser = FeedParser()
5675
+
safe_id = parser.sanitize_entry_id(entry_id)
5677
+
entry_path = self.repo_path / user.directory / f"{safe_id}.json"
5678
+
if not entry_path.exists():
5681
+
with open(entry_path) as f:
5682
+
data = json.load(f)
5684
+
return AtomEntry(**data)
5686
+
def list_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]:
5687
+
"""List entries for a user."""
5688
+
user = self.get_user(username)
5692
+
user_dir = self.repo_path / user.directory
5693
+
if not user_dir.exists():
5697
+
entry_files = sorted(user_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
5701
+
entry_files = entry_files[:limit]
5703
+
for entry_file in entry_files:
5705
+
with open(entry_file) as f:
5706
+
data = json.load(f)
5707
+
entries.append(AtomEntry(**data))
5709
+
# Skip invalid entries
5714
+
def get_duplicates(self) -> DuplicateMap:
5715
+
"""Get the duplicates map."""
5716
+
return self._load_duplicates()
5718
+
def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
5719
+
"""Add a duplicate mapping."""
5720
+
duplicates = self._load_duplicates()
5721
+
duplicates.add_duplicate(duplicate_id, canonical_id)
5722
+
self._save_duplicates(duplicates)
5724
+
def remove_duplicate(self, duplicate_id: str) -> bool:
5725
+
"""Remove a duplicate mapping."""
5726
+
duplicates = self._load_duplicates()
5727
+
result = duplicates.remove_duplicate(duplicate_id)
5728
+
self._save_duplicates(duplicates)
5731
+
def commit_changes(self, message: str) -> None:
5732
+
"""Commit all changes to the Git repository."""
5737
+
self.repo.git.add(A=True)
5739
+
# Check if there are changes to commit
5740
+
if self.repo.index.diff("HEAD"):
5741
+
self.repo.index.commit(message)
5743
+
def get_stats(self) -> dict:
5744
+
"""Get statistics about the Git store."""
5745
+
index = self._load_index()
5746
+
duplicates = self._load_duplicates()
5749
+
"total_users": len(index.users),
5750
+
"total_entries": index.total_entries,
5751
+
"total_duplicates": len(duplicates.duplicates),
5752
+
"last_updated": index.last_updated,
5753
+
"repository_size": sum(f.stat().st_size for f in self.repo_path.rglob("*") if f.is_file()),
5756
+
def search_entries(self, query: str, username: Optional[str] = None,
5757
+
limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]:
5758
+
"""Search entries by content."""
5761
+
# Get users to search
5762
+
index = self._load_index()
5763
+
users = [index.get_user(username)] if username else list(index.users.values())
5764
+
users = [u for u in users if u is not None]
5766
+
for user in users:
5767
+
user_dir = self.repo_path / user.directory
5768
+
if not user_dir.exists():
5771
+
entry_files = user_dir.glob("*.json")
5773
+
for entry_file in entry_files:
5775
+
with open(entry_file) as f:
5776
+
data = json.load(f)
5778
+
entry = AtomEntry(**data)
5780
+
# Simple text search in title, summary, and content
5781
+
searchable_text = " ".join(filter(None, [
5783
+
entry.summary or "",
5784
+
entry.content or "",
5787
+
if query.lower() in searchable_text:
5788
+
results.append((user.username, entry))
5790
+
if limit and len(results) >= limit:
5794
+
# Skip invalid entries
5797
+
# Sort by updated time (newest first)
5798
+
results.sort(key=lambda x: x[1].updated, reverse=True)
5800
+
return results[:limit] if limit else results
5803
+
<file path="ARCH.md">
5804
+
# Thicket Architecture Design
5807
+
Thicket is a modern CLI tool for persisting Atom/RSS feeds in a Git repository, designed to enable distributed webblog comment structures.
5809
+
## Technology Stack
5811
+
### Core Libraries
5813
+
#### CLI Framework
5814
+
- **Typer** (0.15.x) - Modern CLI framework with type hints
5815
+
- **Rich** (13.x) - Beautiful terminal output, progress bars, and tables
5816
+
- **prompt-toolkit** - Interactive prompts when needed
5818
+
#### Feed Processing
5819
+
- **feedparser** (6.0.11) - Universal feed parser supporting RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0
5820
+
- Alternative: **atoma** for stricter Atom/RSS parsing with JSON feed support
5821
+
- Alternative: **fastfeedparser** for high-performance parsing (10x faster)
5823
+
#### Git Integration
5824
+
- **GitPython** (3.1.44) - High-level git operations, requires git CLI
5825
+
- Alternative: **pygit2** (1.18.0) - Direct libgit2 bindings, better for authentication
5828
+
- **httpx** (0.28.x) - Modern async/sync HTTP client with connection pooling
5829
+
- **aiohttp** (3.11.x) - For async-only operations if needed
5831
+
#### Configuration & Data Models
5832
+
- **pydantic** (2.11.x) - Data validation and settings management
5833
+
- **pydantic-settings** (2.10.x) - Configuration file handling with env var support
5836
+
- **pendulum** (3.x) - Better datetime handling
5837
+
- **bleach** (6.x) - HTML sanitization for feed content
5838
+
- **platformdirs** (4.x) - Cross-platform directory paths
5840
+
## Project Structure
5844
+
โโโ pyproject.toml # Modern Python packaging
5845
+
โโโ README.md # Project documentation
5846
+
โโโ ARCH.md # This file
5847
+
โโโ CLAUDE.md # Project instructions
5848
+
โโโ .gitignore
5850
+
โ โโโ thicket/
5851
+
โ โโโ __init__.py
5852
+
โ โโโ __main__.py # Entry point for `python -m thicket`
5853
+
โ โโโ cli/ # CLI commands and interface
5854
+
โ โ โโโ __init__.py
5855
+
โ โ โโโ main.py # Main CLI app with Typer
5856
+
โ โ โโโ commands/ # Subcommands
5857
+
โ โ โ โโโ __init__.py
5858
+
โ โ โ โโโ init.py # Initialize git store
5859
+
โ โ โ โโโ add.py # Add users and feeds
5860
+
โ โ โ โโโ sync.py # Sync feeds
5861
+
โ โ โ โโโ list_cmd.py # List users/feeds
5862
+
โ โ โ โโโ duplicates.py # Manage duplicate entries
5863
+
โ โ โ โโโ links_cmd.py # Extract and categorize links
5864
+
โ โ โ โโโ index_cmd.py # Build reference index and show threads
5865
+
โ โ โโโ utils.py # CLI utilities (progress, formatting)
5866
+
โ โโโ core/ # Core business logic
5867
+
โ โ โโโ __init__.py
5868
+
โ โ โโโ feed_parser.py # Feed parsing and normalization
5869
+
โ โ โโโ git_store.py # Git repository operations
5870
+
โ โ โโโ reference_parser.py # Link extraction and threading
5871
+
โ โโโ models/ # Pydantic data models
5872
+
โ โ โโโ __init__.py
5873
+
โ โ โโโ config.py # Configuration models
5874
+
โ โ โโโ feed.py # Feed/Entry models
5875
+
โ โ โโโ user.py # User metadata models
5876
+
โ โโโ utils/ # Shared utilities
5877
+
โ โโโ __init__.py
5879
+
โ โโโ __init__.py
5880
+
โ โโโ conftest.py # pytest configuration
5881
+
โ โโโ test_feed_parser.py
5882
+
โ โโโ test_git_store.py
5883
+
โ โโโ fixtures/ # Test data
5884
+
โ โโโ feeds/
5886
+
โโโ examples/ # Example configurations
5891
+
### Configuration File (YAML/TOML)
5893
+
class ThicketConfig(BaseSettings):
5894
+
git_store: Path # Git repository location
5895
+
cache_dir: Path # Cache directory
5896
+
users: list[UserConfig]
5898
+
model_config = SettingsConfigDict(
5899
+
env_prefix="THICKET_",
5901
+
yaml_file="thicket.yaml"
5904
+
class UserConfig(BaseModel):
5906
+
feeds: list[HttpUrl]
5907
+
email: Optional[EmailStr] = None
5908
+
homepage: Optional[HttpUrl] = None
5909
+
icon: Optional[HttpUrl] = None
5910
+
display_name: Optional[str] = None
5913
+
### Feed Storage Format
5915
+
class AtomEntry(BaseModel):
5916
+
id: str # Original Atom ID
5920
+
published: Optional[datetime]
5921
+
summary: Optional[str]
5922
+
content: Optional[str] # Full body content from Atom entry
5923
+
content_type: Optional[str] = "html" # text, html, xhtml
5924
+
author: Optional[dict]
5925
+
categories: list[str] = []
5926
+
rights: Optional[str] = None # Copyright info
5927
+
source: Optional[str] = None # Source feed URL
5928
+
# Additional Atom fields preserved during RSS->Atom conversion
5930
+
model_config = ConfigDict(
5932
+
datetime: lambda v: v.isoformat()
5936
+
class DuplicateMap(BaseModel):
5937
+
"""Maps duplicate entry IDs to canonical entry IDs"""
5938
+
duplicates: dict[str, str] = {} # duplicate_id -> canonical_id
5939
+
comment: str = "Entry IDs that map to the same canonical content"
5941
+
def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
5942
+
"""Add a duplicate mapping"""
5943
+
self.duplicates[duplicate_id] = canonical_id
5945
+
def remove_duplicate(self, duplicate_id: str) -> bool:
5946
+
"""Remove a duplicate mapping. Returns True if existed."""
5947
+
return self.duplicates.pop(duplicate_id, None) is not None
5949
+
def get_canonical(self, entry_id: str) -> str:
5950
+
"""Get canonical ID for an entry (returns original if not duplicate)"""
5951
+
return self.duplicates.get(entry_id, entry_id)
5953
+
def is_duplicate(self, entry_id: str) -> bool:
5954
+
"""Check if entry ID is marked as duplicate"""
5955
+
return entry_id in self.duplicates
5958
+
## Git Repository Structure
5961
+
โโโ index.json # User directory index
5962
+
โโโ duplicates.json # Manual curation of duplicate entries
5963
+
โโโ links.json # Unified links, references, and mapping data
5965
+
โ โโโ entry_id_1.json # Sanitized entry files
5966
+
โ โโโ entry_id_2.json
5972
+
## Key Design Decisions
5974
+
### 1. Feed Normalization & Auto-Discovery
5975
+
- All RSS feeds converted to Atom format before storage
5976
+
- Preserves maximum metadata during conversion
5977
+
- Sanitizes HTML content to prevent XSS
5978
+
- **Auto-discovery**: Extracts user metadata from feed during `add user` command
5980
+
### 2. ID Sanitization
5981
+
- Consistent algorithm to convert Atom IDs to safe filenames
5982
+
- Handles edge cases (very long IDs, special characters)
5983
+
- Maintains reversibility where possible
5985
+
### 3. Git Operations
5986
+
- Uses GitPython for simplicity (no authentication required)
5987
+
- Single main branch for all users and entries
5988
+
- Atomic commits per sync operation
5989
+
- Meaningful commit messages with feed update summaries
5990
+
- Preserves complete history - never delete entries even if they disappear from feeds
5992
+
### 4. Caching Strategy
5993
+
- HTTP caching with Last-Modified/ETag support
5994
+
- Local cache of parsed feeds with TTL
5995
+
- Cache invalidation on configuration changes
5996
+
- Git store serves as permanent historical archive beyond feed depth limits
5998
+
### 5. Error Handling
5999
+
- Graceful handling of feed parsing errors
6000
+
- Retry logic for network failures
6001
+
- Clear error messages with recovery suggestions
6003
+
## CLI Command Structure
6006
+
# Initialize a new git store
6007
+
thicket init /path/to/store
6009
+
# Add a user with feeds (auto-discovers metadata from feed)
6010
+
thicket add user "alyssa" \
6011
+
--feed "https://example.com/feed.atom"
6012
+
# Auto-populates: email, homepage, icon, display_name from feed metadata
6014
+
# Add a user with manual overrides
6015
+
thicket add user "alyssa" \
6016
+
--feed "https://example.com/feed.atom" \
6017
+
--email "alyssa@example.com" \
6018
+
--homepage "https://alyssa.example.com" \
6019
+
--icon "https://example.com/avatar.png" \
6020
+
--display-name "Alyssa P. Hacker"
6022
+
# Add additional feed to existing user
6023
+
thicket add feed "alyssa" "https://example.com/other-feed.rss"
6025
+
# Sync all feeds (designed for cron usage)
6026
+
thicket sync --all
6028
+
# Sync specific user
6029
+
thicket sync --user alyssa
6031
+
# List users and their feeds
6032
+
thicket list users
6033
+
thicket list feeds --user alyssa
6035
+
# Manage duplicate entries
6036
+
thicket duplicates list
6037
+
thicket duplicates add <entry_id_1> <entry_id_2> # Mark as duplicates
6038
+
thicket duplicates remove <entry_id_1> <entry_id_2> # Unmark duplicates
6040
+
# Link processing and threading
6041
+
thicket links --verbose # Extract and categorize all links
6042
+
thicket index --verbose # Build reference index for threading
6043
+
thicket threads # Show conversation threads
6044
+
thicket threads --username user1 # Show threads for specific user
6045
+
thicket threads --min-size 3 # Show threads with minimum size
6048
+
## Performance Considerations
6050
+
1. **Concurrent Feed Fetching**: Use httpx with asyncio for parallel downloads
6051
+
2. **Incremental Updates**: Only fetch/parse feeds that have changed
6052
+
3. **Efficient Git Operations**: Batch commits, use shallow clones where appropriate
6053
+
4. **Progress Feedback**: Rich progress bars for long operations
6055
+
## Security Considerations
6057
+
1. **HTML Sanitization**: Use bleach to clean feed content
6058
+
2. **URL Validation**: Strict validation of feed URLs
6059
+
3. **Git Security**: No credentials stored in repository
6060
+
4. **Path Traversal**: Careful sanitization of filenames
6062
+
## Future Enhancements
6064
+
1. **Web Interface**: Optional web UI for browsing the git store
6065
+
2. **Webhooks**: Notify external services on feed updates
6066
+
3. **Feed Discovery**: Auto-discover feeds from HTML pages
6067
+
4. **Export Formats**: Generate static sites, OPML exports
6068
+
5. **Federation**: P2P sync between thicket instances
6070
+
## Requirements Clarification
6072
+
**โ Resolved Requirements:**
6073
+
1. **Feed Update Frequency**: Designed for cron usage - no built-in scheduling needed
6074
+
2. **Duplicate Handling**: Manual curation via `duplicates.json` file with CLI commands
6075
+
3. **Git Branching**: Single main branch for all users and entries
6076
+
4. **Authentication**: No feeds require authentication currently
6077
+
5. **Content Storage**: Store complete Atom entry body content as provided
6078
+
6. **Deleted Entries**: Preserve all entries in Git store permanently (historical archive)
6079
+
7. **History Depth**: Git store maintains full history beyond feed depth limits
6080
+
8. **Feed Auto-Discovery**: Extract user metadata from feed during `add user` command
6082
+
## Duplicate Entry Management
6084
+
### Duplicate Detection Strategy
6085
+
- **Manual Curation**: Duplicates identified and managed manually via CLI
6086
+
- **Storage**: `duplicates.json` file in Git root maps entry IDs to canonical entries
6087
+
- **Structure**: `{"duplicate_id": "canonical_id", ...}`
6088
+
- **CLI Commands**: Add/remove duplicate mappings with validation
6089
+
- **Query Resolution**: Search/list commands resolve duplicates to canonical entries
6091
+
### Duplicate File Format
6094
+
"https://example.com/feed/entry/123": "https://canonical.com/posts/same-post",
6095
+
"https://mirror.com/articles/456": "https://canonical.com/posts/same-post",
6096
+
"comment": "Entry IDs that map to the same canonical content"
6100
+
## Feed Metadata Auto-Discovery
6102
+
### Extraction Strategy
6103
+
When adding a new user with `thicket add user`, the system fetches and parses the feed to extract:
6105
+
- **Display Name**: From `feed.title` or `feed.author.name`
6106
+
- **Email**: From `feed.author.email` or `feed.managingEditor`
6107
+
- **Homepage**: From `feed.link` or `feed.author.uri`
6108
+
- **Icon**: From `feed.logo`, `feed.icon`, or `feed.image.url`
6110
+
### Discovery Priority Order
6111
+
1. **Author Information**: Prefer `feed.author.*` fields (more specific to person)
6112
+
2. **Feed-Level**: Fall back to feed-level metadata
6113
+
3. **Manual Override**: CLI flags always take precedence over discovered values
6114
+
4. **Update Behavior**: Auto-discovery only runs during initial `add user`, not on sync
6116
+
### Extracted Metadata Format
6118
+
class FeedMetadata(BaseModel):
6119
+
title: Optional[str] = None
6120
+
author_name: Optional[str] = None
6121
+
author_email: Optional[EmailStr] = None
6122
+
author_uri: Optional[HttpUrl] = None
6123
+
link: Optional[HttpUrl] = None
6124
+
logo: Optional[HttpUrl] = None
6125
+
icon: Optional[HttpUrl] = None
6126
+
image_url: Optional[HttpUrl] = None
6128
+
def to_user_config(self, username: str, feed_url: HttpUrl) -> UserConfig:
6129
+
"""Convert discovered metadata to UserConfig with fallbacks"""
6130
+
return UserConfig(
6131
+
username=username,
6133
+
display_name=self.author_name or self.title,
6134
+
email=self.author_email,
6135
+
homepage=self.author_uri or self.link,
6136
+
icon=self.logo or self.icon or self.image_url
6140
+
## Link Processing and Threading Architecture
6143
+
The thicket system implements a sophisticated link processing and threading system to create email-style threaded views of blog entries by tracking cross-references between different blogs.
6145
+
### Link Processing Pipeline
6147
+
#### 1. Link Extraction (`thicket links`)
6148
+
The `links` command systematically extracts all outbound links from blog entries and categorizes them:
6151
+
class LinkData(BaseModel):
6152
+
url: str # Fully resolved URL
6153
+
entry_id: str # Source entry ID
6154
+
username: str # Source username
6155
+
context: str # Surrounding text context
6156
+
category: str # "internal", "user", or "unknown"
6157
+
target_username: Optional[str] # Target user if applicable
6160
+
**Link Categories:**
6161
+
- **Internal**: Links to the same user's domain (self-references)
6162
+
- **User**: Links to other tracked users' domains
6163
+
- **Unknown**: Links to external sites not tracked by thicket
6165
+
#### 2. URL Resolution
6166
+
All links are properly resolved using the Atom feed's base URL to handle:
6167
+
- Relative URLs (converted to absolute)
6168
+
- Protocol-relative URLs
6169
+
- Fragment identifiers
6170
+
- Redirects and canonical URLs
6172
+
#### 3. Domain Mapping
6173
+
The system builds a comprehensive domain mapping from user configuration:
6174
+
- Feed URLs โ domain extraction
6175
+
- Homepage URLs โ domain extraction
6176
+
- Reverse mapping: domain โ username
6178
+
### Threading System
6180
+
#### 1. Reference Index Generation (`thicket index`)
6181
+
Creates a bidirectional reference index from the categorized links:
6184
+
class BlogReference(BaseModel):
6185
+
source_entry_id: str
6186
+
source_username: str
6188
+
target_username: Optional[str]
6189
+
target_entry_id: Optional[str]
6193
+
#### 2. Thread Detection Algorithm
6194
+
Uses graph traversal to find connected blog entries:
6195
+
- **Outbound references**: Links from an entry to other entries
6196
+
- **Inbound references**: Links to an entry from other entries
6197
+
- **Thread members**: All entries connected through references
6199
+
#### 3. Threading Display (`thicket threads`)
6200
+
Creates email-style threaded views:
6201
+
- Chronological ordering within threads
6202
+
- Reference counts (outbound/inbound)
6203
+
- Context preservation
6204
+
- Filtering options (user, entry, minimum size)
6206
+
### Data Structures
6208
+
#### links.json Format (Unified Structure)
6212
+
"https://example.com/post/123": {
6213
+
"referencing_entries": ["https://blog.user.com/entry/456"],
6214
+
"target_username": "user2"
6216
+
"https://external-site.com/article": {
6217
+
"referencing_entries": ["https://blog.user.com/entry/789"]
6220
+
"reverse_mapping": {
6221
+
"https://blog.user.com/entry/456": ["https://example.com/post/123"],
6222
+
"https://blog.user.com/entry/789": ["https://external-site.com/article"]
6226
+
"source_entry_id": "https://blog.user.com/entry/456",
6227
+
"source_username": "user1",
6228
+
"target_url": "https://example.com/post/123",
6229
+
"target_username": "user2",
6230
+
"target_entry_id": "https://example.com/post/123",
6231
+
"context": "As mentioned in this post..."
6235
+
"user1": ["blog.user.com"],
6236
+
"user2": ["example.com"]
6241
+
This unified structure eliminates duplication by:
6242
+
- Storing each URL only once with minimal metadata
6243
+
- Including all link data, reference data, and mappings in one file
6244
+
- Using presence of `target_username` to identify tracked vs external links
6245
+
- Providing bidirectional mappings for efficient queries
6247
+
### Unified Structure Benefits
6249
+
- **Eliminates Duplication**: Each URL appears only once with metadata
6250
+
- **Single Source of Truth**: All link-related data in one file
6251
+
- **Efficient Queries**: Fast lookups for both directions (URLโentries, entryโURLs)
6252
+
- **Atomic Updates**: All link data changes together
6253
+
- **Reduced I/O**: Fewer file operations
6255
+
### Implementation Benefits
6257
+
1. **Systematic Link Processing**: All links are extracted and categorized consistently
6258
+
2. **Proper URL Resolution**: Handles relative URLs and base URL resolution correctly
6259
+
3. **Domain-based Categorization**: Automatically identifies user-to-user references
6260
+
4. **Bidirectional Indexing**: Supports both "who links to whom" and "who is linked by whom"
6261
+
5. **Thread Discovery**: Finds conversation threads automatically
6262
+
6. **Rich Context**: Preserves surrounding text for each link
6263
+
7. **Performance**: Pre-computed indexes for fast threading queries
6268
+
# Extract and categorize all links
6269
+
thicket links --verbose
6271
+
# Build reference index for threading
6272
+
thicket index --verbose
6274
+
# Show all conversation threads
6277
+
# Show threads for specific user
6278
+
thicket threads --username user1
6280
+
# Show threads with minimum size
6281
+
thicket threads --min-size 3
6284
+
### Integration with Existing Commands
6286
+
The link processing system integrates seamlessly with existing thicket commands:
6287
+
- `thicket sync` updates entries, requiring `thicket links` to be run afterward
6288
+
- `thicket index` uses the output from `thicket links` for improved accuracy
6289
+
- `thicket threads` provides the user-facing threading interface
6291
+
## Current Implementation Status
6293
+
### โ
Completed Features
6294
+
1. **Core Infrastructure**
6295
+
- Modern CLI with Typer and Rich
6296
+
- Pydantic data models for type safety
6297
+
- Git repository operations with GitPython
6298
+
- Feed parsing and normalization with feedparser
6300
+
2. **User and Feed Management**
6301
+
- `thicket init` - Initialize git store
6302
+
- `thicket add` - Add users and feeds with auto-discovery
6303
+
- `thicket sync` - Sync feeds with progress tracking
6304
+
- `thicket list` - List users, feeds, and entries
6305
+
- `thicket duplicates` - Manage duplicate entries
6307
+
3. **Link Processing and Threading**
6308
+
- `thicket links` - Extract and categorize all outbound links
6309
+
- `thicket index` - Build reference index from links
6310
+
- `thicket threads` - Display threaded conversation views
6311
+
- Proper URL resolution with base URL handling
6312
+
- Domain-based link categorization
6313
+
- Context preservation for links
6315
+
### ๐ System Performance
6316
+
- **Link Extraction**: Successfully processes thousands of blog entries
6317
+
- **Categorization**: Identifies internal, user, and unknown links
6318
+
- **Threading**: Creates email-style threaded views of conversations
6319
+
- **Storage**: Efficient JSON-based data structures for links and references
6321
+
### ๐ง Current Architecture Highlights
6322
+
- **Modular Design**: Clear separation between CLI, core logic, and models
6323
+
- **Type Safety**: Comprehensive Pydantic models for data validation
6324
+
- **Rich CLI**: Beautiful progress bars, tables, and error handling
6325
+
- **Extensible**: Easy to add new commands and features
6326
+
- **Git Integration**: All data stored in version-controlled JSON files
6328
+
### ๐ฏ Proven Functionality
6329
+
The system has been tested with real blog data and successfully:
6330
+
- Extracted 14,396 total links from blog entries
6331
+
- Categorized 3,994 internal links, 363 user-to-user links, and 10,039 unknown links
6332
+
- Built comprehensive domain mappings for 16 users across 20 domains
6333
+
- Generated threaded views showing blog conversation patterns
6335
+
### ๐ Ready for Use
6336
+
The thicket system is now fully functional for:
6337
+
- Maintaining Git repositories of blog feeds
6338
+
- Tracking cross-references between blogs
6339
+
- Creating threaded views of blog conversations
6340
+
- Discovering blog interaction patterns
6341
+
- Building distributed comment systems
6344
+
<file path="src/thicket/cli/utils.py">
6345
+
"""CLI utilities and helpers."""
6347
+
from pathlib import Path
6348
+
from typing import Optional
6351
+
from rich.console import Console
6352
+
from rich.progress import Progress, SpinnerColumn, TextColumn
6353
+
from rich.table import Table
6355
+
from ..models import ThicketConfig, UserMetadata
6356
+
from ..core.git_store import GitStore
6358
+
console = Console()
6361
+
def get_tsv_mode() -> bool:
6362
+
"""Get the global TSV mode setting."""
6363
+
from .main import tsv_mode
6367
+
def load_config(config_path: Optional[Path] = None) -> ThicketConfig:
6368
+
"""Load thicket configuration from file or environment."""
6369
+
if config_path and config_path.exists():
6372
+
with open(config_path) as f:
6373
+
config_data = yaml.safe_load(f)
6375
+
# Convert to ThicketConfig
6376
+
return ThicketConfig(**config_data)
6378
+
# Try to load from default locations or environment
6380
+
# First try to find thicket.yaml in current directory
6381
+
default_config = Path("thicket.yaml")
6382
+
if default_config.exists():
6384
+
with open(default_config) as f:
6385
+
config_data = yaml.safe_load(f)
6386
+
return ThicketConfig(**config_data)
6388
+
# Fall back to environment variables
6389
+
return ThicketConfig()
6390
+
except Exception as e:
6391
+
console.print(f"[red]Error loading configuration: {e}[/red]")
6392
+
console.print("[yellow]Run 'thicket init' to create a new configuration.[/yellow]")
6393
+
raise typer.Exit(1) from e
6396
+
def save_config(config: ThicketConfig, config_path: Path) -> None:
6397
+
"""Save thicket configuration to file."""
6400
+
config_data = config.model_dump(mode="json", exclude_none=True)
6402
+
# Convert Path objects to strings for YAML serialization
6403
+
config_data["git_store"] = str(config_data["git_store"])
6404
+
config_data["cache_dir"] = str(config_data["cache_dir"])
6406
+
with open(config_path, "w") as f:
6407
+
yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
6410
+
def create_progress() -> Progress:
6411
+
"""Create a Rich progress display."""
6414
+
TextColumn("[progress.description]{task.description}"),
6420
+
def print_users_table(config: ThicketConfig) -> None:
6421
+
"""Print a table of users and their feeds."""
6422
+
if get_tsv_mode():
6423
+
print_users_tsv(config)
6426
+
table = Table(title="Users and Feeds")
6427
+
table.add_column("Username", style="cyan", no_wrap=True)
6428
+
table.add_column("Display Name", style="magenta")
6429
+
table.add_column("Email", style="blue")
6430
+
table.add_column("Homepage", style="green")
6431
+
table.add_column("Feeds", style="yellow")
6433
+
for user in config.users:
6434
+
feeds_str = "\n".join(str(feed) for feed in user.feeds)
6437
+
user.display_name or "",
6439
+
str(user.homepage) if user.homepage else "",
6443
+
console.print(table)
6446
+
def print_feeds_table(config: ThicketConfig, username: Optional[str] = None) -> None:
6447
+
"""Print a table of feeds, optionally filtered by username."""
6448
+
if get_tsv_mode():
6449
+
print_feeds_tsv(config, username)
6452
+
table = Table(title=f"Feeds{f' for {username}' if username else ''}")
6453
+
table.add_column("Username", style="cyan", no_wrap=True)
6454
+
table.add_column("Feed URL", style="blue")
6455
+
table.add_column("Status", style="green")
6457
+
users = [config.find_user(username)] if username else config.users
6458
+
users = [u for u in users if u is not None]
6460
+
for user in users:
6461
+
for feed in user.feeds:
6465
+
"Active", # TODO: Add actual status checking
6468
+
console.print(table)
6471
+
def confirm_action(message: str, default: bool = False) -> bool:
6472
+
"""Prompt for confirmation."""
6473
+
return typer.confirm(message, default=default)
6476
+
def print_success(message: str) -> None:
6477
+
"""Print a success message."""
6478
+
console.print(f"[green]โ[/green] {message}")
6481
+
def print_error(message: str) -> None:
6482
+
"""Print an error message."""
6483
+
console.print(f"[red]โ[/red] {message}")
6486
+
def print_warning(message: str) -> None:
6487
+
"""Print a warning message."""
6488
+
console.print(f"[yellow]โ [/yellow] {message}")
6491
+
def print_info(message: str) -> None:
6492
+
"""Print an info message."""
6493
+
console.print(f"[blue]โน[/blue] {message}")
6496
+
def print_users_table_from_git(users: list[UserMetadata]) -> None:
6497
+
"""Print a table of users from git repository."""
6498
+
if get_tsv_mode():
6499
+
print_users_tsv_from_git(users)
6502
+
table = Table(title="Users and Feeds")
6503
+
table.add_column("Username", style="cyan", no_wrap=True)
6504
+
table.add_column("Display Name", style="magenta")
6505
+
table.add_column("Email", style="blue")
6506
+
table.add_column("Homepage", style="green")
6507
+
table.add_column("Feeds", style="yellow")
6509
+
for user in users:
6510
+
feeds_str = "\n".join(user.feeds)
6513
+
user.display_name or "",
6515
+
user.homepage or "",
6519
+
console.print(table)
6522
+
def print_feeds_table_from_git(git_store: GitStore, username: Optional[str] = None) -> None:
6523
+
"""Print a table of feeds from git repository."""
6524
+
if get_tsv_mode():
6525
+
print_feeds_tsv_from_git(git_store, username)
6528
+
table = Table(title=f"Feeds{f' for {username}' if username else ''}")
6529
+
table.add_column("Username", style="cyan", no_wrap=True)
6530
+
table.add_column("Feed URL", style="blue")
6531
+
table.add_column("Status", style="green")
6534
+
user = git_store.get_user(username)
6535
+
users = [user] if user else []
6537
+
index = git_store._load_index()
6538
+
users = list(index.users.values())
6540
+
for user in users:
6541
+
for feed in user.feeds:
6545
+
"Active", # TODO: Add actual status checking
6548
+
console.print(table)
6551
+
def print_users_tsv(config: ThicketConfig) -> None:
6552
+
"""Print users in TSV format."""
6553
+
print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")
6554
+
for user in config.users:
6555
+
feeds_str = ",".join(str(feed) for feed in user.feeds)
6556
+
print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")
6559
+
def print_users_tsv_from_git(users: list[UserMetadata]) -> None:
6560
+
"""Print users from git repository in TSV format."""
6561
+
print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")
6562
+
for user in users:
6563
+
feeds_str = ",".join(user.feeds)
6564
+
print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")
6567
+
def print_feeds_tsv(config: ThicketConfig, username: Optional[str] = None) -> None:
6568
+
"""Print feeds in TSV format."""
6569
+
print("Username\tFeed URL\tStatus")
6570
+
users = [config.find_user(username)] if username else config.users
6571
+
users = [u for u in users if u is not None]
6573
+
for user in users:
6574
+
for feed in user.feeds:
6575
+
print(f"{user.username}\t{feed}\tActive")
6578
+
def print_feeds_tsv_from_git(git_store: GitStore, username: Optional[str] = None) -> None:
6579
+
"""Print feeds from git repository in TSV format."""
6580
+
print("Username\tFeed URL\tStatus")
6583
+
user = git_store.get_user(username)
6584
+
users = [user] if user else []
6586
+
index = git_store._load_index()
6587
+
users = list(index.users.values())
6589
+
for user in users:
6590
+
for feed in user.feeds:
6591
+
print(f"{user.username}\t{feed}\tActive")
6594
+
def print_entries_tsv(entries_by_user: list[list], usernames: list[str]) -> None:
6595
+
"""Print entries in TSV format."""
6596
+
print("User\tAtom ID\tTitle\tUpdated\tURL")
6598
+
# Combine all entries with usernames
6600
+
for entries, username in zip(entries_by_user, usernames):
6601
+
for entry in entries:
6602
+
all_entries.append((username, entry))
6604
+
# Sort by updated time (newest first)
6605
+
all_entries.sort(key=lambda x: x[1].updated, reverse=True)
6607
+
for username, entry in all_entries:
6608
+
# Format updated time
6609
+
updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")
6611
+
# Escape tabs and newlines in title to preserve TSV format
6612
+
title = entry.title.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
6614
+
print(f"{username}\t{entry.id}\t{title}\t{updated_str}\t{entry.link}")