···
1
-
This file is a merged representation of the entire codebase, combined into a single document by Repomix.
4
-
This section contains a summary of this file.
7
-
This file contains a packed representation of the entire repository's contents.
8
-
It is designed to be easily consumable by AI systems for analysis, code review,
9
-
or other automated processes.
13
-
The content is organized as follows:
14
-
1. This summary section
15
-
2. Repository information
16
-
3. Directory structure
17
-
4. Repository files (if enabled)
18
-
5. Multiple file entries, each consisting of:
19
-
- File path as an attribute
20
-
- Full contents of the file
24
-
- This file should be treated as read-only. Any changes should be made to the
25
-
original repository files, not this packed version.
26
-
- When processing this file, use the file path to distinguish
27
-
between different files in the repository.
28
-
- Be aware that this file may contain sensitive information. Handle it with
29
-
the same level of security as you would the original repository.
33
-
- Some files may have been excluded based on .gitignore rules and Repomix's configuration
34
-
- Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files
35
-
- Files matching patterns in .gitignore are excluded
36
-
- Files matching default ignore patterns are excluded
37
-
- Files are sorted by Git change count (files with more changes are at the bottom)
42
-
<directory_structure>
89
-
</directory_structure>
92
-
This section contains the contents of the repository's files.
94
-
<file path=".claude/settings.local.json">
106
-
"enableAllProjectMcpServers": false
110
-
<file path="src/thicket/cli/commands/generate.py">
111
-
"""Generate static HTML website from thicket data."""
117
-
from datetime import datetime
118
-
from pathlib import Path
119
-
from typing import Any, Optional, TypedDict, Union
122
-
from jinja2 import Environment, FileSystemLoader, select_autoescape
123
-
from rich.progress import Progress, SpinnerColumn, TextColumn
125
-
from ...core.git_store import GitStore
126
-
from ...models.feed import AtomEntry
127
-
from ...models.user import GitStoreIndex, UserMetadata
128
-
from ..main import app
129
-
from ..utils import console, load_config
132
-
class UserData(TypedDict):
133
-
"""Type definition for user data structure."""
135
-
metadata: UserMetadata
136
-
recent_entries: list[tuple[str, AtomEntry]]
139
-
def safe_anchor_id(atom_id: str) -> str:
140
-
"""Convert an Atom ID to a safe HTML anchor ID."""
141
-
# Use base64 URL-safe encoding without padding
142
-
encoded = base64.urlsafe_b64encode(atom_id.encode('utf-8')).decode('ascii').rstrip('=')
143
-
# Prefix with 'id' to ensure it starts with a letter (HTML requirement)
144
-
return f"id{encoded}"
147
-
class WebsiteGenerator:
148
-
"""Generate static HTML website from thicket data."""
150
-
def __init__(self, git_store: GitStore, output_dir: Path):
151
-
self.git_store = git_store
152
-
self.output_dir = output_dir
153
-
self.template_dir = Path(__file__).parent.parent.parent / "templates"
155
-
# Initialize Jinja2 environment
156
-
self.env = Environment(
157
-
loader=FileSystemLoader(self.template_dir),
158
-
autoescape=select_autoescape(["html", "xml"]),
162
-
self.index: Optional[GitStoreIndex] = None
163
-
self.entries: list[tuple[str, AtomEntry]] = [] # (username, entry)
164
-
self.links_data: Optional[dict[str, Any]] = None
165
-
self.threads: list[list[dict[str, Any]]] = [] # List of threads with metadata
167
-
def get_display_name(self, username: str) -> str:
168
-
"""Get display name for a user, falling back to username."""
169
-
if self.index and username in self.index.users:
170
-
user = self.index.users[username]
171
-
return user.display_name or username
174
-
def get_user_homepage(self, username: str) -> Optional[str]:
175
-
"""Get homepage URL for a user."""
176
-
if self.index and username in self.index.users:
177
-
user = self.index.users[username]
178
-
return str(user.homepage) if user.homepage else None
181
-
def clean_html_summary(self, content: Optional[str], max_length: int = 200) -> str:
182
-
"""Clean HTML content and truncate for display in timeline."""
187
-
clean_text = re.sub(r"<[^>]+>", " ", content)
188
-
# Replace multiple whitespace with single space
189
-
clean_text = re.sub(r"\s+", " ", clean_text)
190
-
# Strip leading/trailing whitespace
191
-
clean_text = clean_text.strip()
193
-
# Truncate with ellipsis if needed
194
-
if len(clean_text) > max_length:
195
-
# Try to break at word boundary
196
-
truncated = clean_text[:max_length]
197
-
last_space = truncated.rfind(" ")
199
-
last_space > max_length * 0.8
200
-
): # If we can break reasonably close to the limit
201
-
clean_text = truncated[:last_space] + "..."
203
-
clean_text = truncated + "..."
207
-
def load_data(self) -> None:
208
-
"""Load all data from the git repository."""
211
-
TextColumn("[progress.description]{task.description}"),
215
-
task = progress.add_task("Loading repository index...", total=None)
216
-
self.index = self.git_store._load_index()
218
-
raise ValueError("No index found in repository")
219
-
progress.update(task, completed=True)
222
-
task = progress.add_task("Loading entries...", total=None)
223
-
for username, user_metadata in self.index.users.items():
224
-
user_dir = self.git_store.repo_path / user_metadata.directory
225
-
if user_dir.exists():
226
-
for entry_file in user_dir.glob("*.json"):
227
-
if entry_file.name not in ["index.json", "duplicates.json"]:
229
-
with open(entry_file) as f:
230
-
entry_data = json.load(f)
231
-
entry = AtomEntry(**entry_data)
232
-
self.entries.append((username, entry))
233
-
except Exception as e:
235
-
f"[yellow]Warning: Failed to load {entry_file}: {e}[/yellow]"
237
-
progress.update(task, completed=True)
239
-
# Sort entries by date (newest first) - prioritize updated over published
241
-
key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True
245
-
task = progress.add_task("Loading links and references...", total=None)
246
-
links_file = self.git_store.repo_path / "links.json"
247
-
if links_file.exists():
248
-
with open(links_file) as f:
249
-
self.links_data = json.load(f)
250
-
progress.update(task, completed=True)
252
-
def build_threads(self) -> None:
253
-
"""Build threaded conversations from references."""
254
-
if not self.links_data or "references" not in self.links_data:
257
-
# Map entry IDs to (username, entry) tuples
258
-
entry_map: dict[str, tuple[str, AtomEntry]] = {}
259
-
for username, entry in self.entries:
260
-
entry_map[entry.id] = (username, entry)
262
-
# Build adjacency lists for references
263
-
self.outbound_refs: dict[str, set[str]] = {}
264
-
self.inbound_refs: dict[str, set[str]] = {}
265
-
self.reference_details: dict[
266
-
str, list[dict[str, Any]]
267
-
] = {} # Store full reference info
269
-
for ref in self.links_data["references"]:
270
-
source_id = ref["source_entry_id"]
271
-
target_id = ref.get("target_entry_id")
273
-
if target_id and source_id in entry_map and target_id in entry_map:
274
-
self.outbound_refs.setdefault(source_id, set()).add(target_id)
275
-
self.inbound_refs.setdefault(target_id, set()).add(source_id)
277
-
# Store reference details for UI
278
-
self.reference_details.setdefault(source_id, []).append(
280
-
"target_id": target_id,
281
-
"target_username": ref.get("target_username"),
282
-
"type": "outbound",
285
-
self.reference_details.setdefault(target_id, []).append(
287
-
"source_id": source_id,
288
-
"source_username": ref.get("source_username"),
293
-
# Find conversation threads (multi-post discussions)
296
-
for entry_id, (_username, _entry) in entry_map.items():
297
-
if entry_id in processed:
300
-
# Build thread starting from this entry
302
-
to_visit = [entry_id]
304
-
level_map: dict[str, int] = {} # Track levels for this thread
306
-
# First, traverse up to find the root
308
-
while current in self.inbound_refs:
309
-
parents = self.inbound_refs[current] - {
311
-
} # Exclude self-references
314
-
# Take the first parent
315
-
parent = next(iter(parents))
316
-
if parent in thread_ids: # Avoid cycles
319
-
to_visit.insert(0, current)
321
-
# Now traverse down from the root
323
-
current = to_visit.pop(0)
324
-
if current in thread_ids or current not in entry_map:
327
-
thread_ids.add(current)
328
-
username, entry = entry_map[current]
330
-
# Calculate thread level
331
-
thread_level = self._calculate_thread_level(current, level_map)
333
-
# Add threading metadata
335
-
"username": username,
336
-
"display_name": self.get_display_name(username),
338
-
"entry_id": current,
339
-
"references_to": list(self.outbound_refs.get(current, [])),
340
-
"referenced_by": list(self.inbound_refs.get(current, [])),
341
-
"thread_level": thread_level,
343
-
thread.append(thread_entry)
344
-
processed.add(current)
347
-
if current in self.outbound_refs:
348
-
children = self.outbound_refs[current] - thread_ids # Avoid cycles
349
-
to_visit.extend(sorted(children))
351
-
if len(thread) > 1: # Only keep actual threads
352
-
# Sort thread by date (newest first) - prioritize updated over published
353
-
thread.sort(key=lambda x: x["entry"].updated or x["entry"].published or datetime.min, reverse=True) # type: ignore
354
-
self.threads.append(thread)
356
-
# Sort threads by the date of their most recent entry - prioritize updated over published
359
-
item["entry"].updated or item["entry"].published or datetime.min for item in t
364
-
def _calculate_thread_level(
365
-
self, entry_id: str, processed_entries: dict[str, int]
367
-
"""Calculate indentation level for threaded display."""
368
-
if entry_id in processed_entries:
369
-
return processed_entries[entry_id]
371
-
if entry_id not in self.inbound_refs:
372
-
processed_entries[entry_id] = 0
375
-
parents_in_thread = self.inbound_refs[entry_id] & set(processed_entries.keys())
376
-
if not parents_in_thread:
377
-
processed_entries[entry_id] = 0
380
-
# Find the deepest parent level + 1
381
-
max_parent_level = 0
382
-
for parent_id in parents_in_thread:
383
-
parent_level = self._calculate_thread_level(parent_id, processed_entries)
384
-
max_parent_level = max(max_parent_level, parent_level)
386
-
level = min(max_parent_level + 1, 4) # Cap at level 4
387
-
processed_entries[entry_id] = level
390
-
def get_standalone_references(self) -> list[dict[str, Any]]:
391
-
"""Get posts that have references but aren't part of multi-post threads."""
392
-
if not hasattr(self, "reference_details"):
395
-
threaded_entry_ids = set()
396
-
for thread in self.threads:
397
-
for item in thread:
398
-
threaded_entry_ids.add(item["entry_id"])
400
-
standalone_refs = []
401
-
for username, entry in self.entries:
403
-
entry.id in self.reference_details
404
-
and entry.id not in threaded_entry_ids
406
-
refs = self.reference_details[entry.id]
407
-
# Only include if it has meaningful references (not just self-references)
408
-
meaningful_refs = [
411
-
if r.get("target_id") != entry.id and r.get("source_id") != entry.id
413
-
if meaningful_refs:
414
-
standalone_refs.append(
416
-
"username": username,
417
-
"display_name": self.get_display_name(username),
419
-
"references": meaningful_refs,
423
-
return standalone_refs
425
-
def _add_cross_thread_links(self, timeline_items: list[dict[str, Any]]) -> None:
426
-
"""Add cross-thread linking for entries that appear in multiple threads."""
427
-
# Map entry IDs to their positions in the timeline
428
-
entry_positions: dict[str, list[int]] = {}
429
-
# Map URLs referenced by entries to the entries that reference them
430
-
url_references: dict[str, list[tuple[str, int]]] = {} # url -> [(entry_id, position)]
432
-
# First pass: collect all entry IDs, their positions, and referenced URLs
433
-
for i, item in enumerate(timeline_items):
434
-
if item["type"] == "post":
435
-
entry_id = item["content"]["entry"].id
436
-
entry_positions.setdefault(entry_id, []).append(i)
437
-
# Track URLs this entry references
438
-
if entry_id in self.reference_details:
439
-
for ref in self.reference_details[entry_id]:
440
-
if ref["type"] == "outbound" and "target_id" in ref:
441
-
# Find the target entry's URL if available
442
-
target_entry = self._find_entry_by_id(ref["target_id"])
443
-
if target_entry and target_entry.link:
444
-
url = str(target_entry.link)
445
-
url_references.setdefault(url, []).append((entry_id, i))
446
-
elif item["type"] == "thread":
447
-
for thread_item in item["content"]:
448
-
entry_id = thread_item["entry"].id
449
-
entry_positions.setdefault(entry_id, []).append(i)
450
-
# Track URLs this entry references
451
-
if entry_id in self.reference_details:
452
-
for ref in self.reference_details[entry_id]:
453
-
if ref["type"] == "outbound" and "target_id" in ref:
454
-
target_entry = self._find_entry_by_id(ref["target_id"])
455
-
if target_entry and target_entry.link:
456
-
url = str(target_entry.link)
457
-
url_references.setdefault(url, []).append((entry_id, i))
459
-
# Build cross-thread connections - only for entries that actually appear multiple times
460
-
cross_thread_connections: dict[str, set[int]] = {} # entry_id -> set of timeline positions
462
-
# Add connections ONLY for entries that appear multiple times in the timeline
463
-
for entry_id, positions in entry_positions.items():
464
-
if len(positions) > 1:
465
-
cross_thread_connections[entry_id] = set(positions)
466
-
# Debug: uncomment to see which entries have multiple appearances
467
-
# print(f"Entry {entry_id[:50]}... appears at positions: {positions}")
469
-
# Apply cross-thread links to timeline items
470
-
for entry_id, positions_set in cross_thread_connections.items():
471
-
positions_list = list(positions_set)
472
-
for pos in positions_list:
473
-
item = timeline_items[pos]
474
-
other_positions = sorted([p for p in positions_list if p != pos])
476
-
if item["type"] == "post":
477
-
# Add cross-thread info to individual posts
478
-
item["content"]["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items)
479
-
# Add info about shared references
480
-
item["content"]["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items)
481
-
elif item["type"] == "thread":
482
-
# Add cross-thread info to thread items
483
-
for thread_item in item["content"]:
484
-
if thread_item["entry"].id == entry_id:
485
-
thread_item["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items)
486
-
thread_item["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items)
489
-
def _build_cross_thread_link_data(self, entry_id: str, other_positions: list[int], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
490
-
"""Build detailed cross-thread link data with anchor information."""
491
-
cross_thread_links = []
493
-
for pos in other_positions:
494
-
item = timeline_items[pos]
495
-
if item["type"] == "post":
496
-
# For individual posts
497
-
safe_id = safe_anchor_id(entry_id)
498
-
cross_thread_links.append({
500
-
"anchor_id": f"post-{pos}-{safe_id}",
501
-
"context": "individual post",
502
-
"title": item["content"]["entry"].title
504
-
elif item["type"] == "thread":
505
-
# For thread items, find the specific thread item
506
-
for thread_idx, thread_item in enumerate(item["content"]):
507
-
if thread_item["entry"].id == entry_id:
508
-
safe_id = safe_anchor_id(entry_id)
509
-
cross_thread_links.append({
511
-
"anchor_id": f"post-{pos}-{thread_idx}-{safe_id}",
512
-
"context": f"thread (level {thread_item.get('thread_level', 0)})",
513
-
"title": thread_item["entry"].title
517
-
return cross_thread_links
519
-
def _find_entry_by_id(self, entry_id: str) -> Optional[AtomEntry]:
520
-
"""Find an entry by its ID."""
521
-
for _username, entry in self.entries:
522
-
if entry.id == entry_id:
526
-
def _get_shared_references(self, entry_id: str, positions: Union[set[int], list[int]], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
527
-
"""Get information about shared references between cross-thread entries."""
530
-
# Collect all referenced URLs from entries at these positions
531
-
url_counts: dict[str, int] = {}
532
-
referencing_entries: dict[str, list[str]] = {} # url -> [entry_ids]
534
-
for pos in positions:
535
-
item = timeline_items[pos]
536
-
entries_to_check = []
538
-
if item["type"] == "post":
539
-
entries_to_check.append(item["content"]["entry"])
540
-
elif item["type"] == "thread":
541
-
entries_to_check.extend([ti["entry"] for ti in item["content"]])
543
-
for entry in entries_to_check:
544
-
if entry.id in self.reference_details:
545
-
for ref in self.reference_details[entry.id]:
546
-
if ref["type"] == "outbound" and "target_id" in ref:
547
-
target_entry = self._find_entry_by_id(ref["target_id"])
548
-
if target_entry and target_entry.link:
549
-
url = str(target_entry.link)
550
-
url_counts[url] = url_counts.get(url, 0) + 1
551
-
if url not in referencing_entries:
552
-
referencing_entries[url] = []
553
-
if entry.id not in referencing_entries[url]:
554
-
referencing_entries[url].append(entry.id)
556
-
# Find URLs referenced by multiple entries
557
-
for url, count in url_counts.items():
558
-
if count > 1 and len(referencing_entries[url]) > 1:
559
-
# Get the target entry info
560
-
target_entry = None
561
-
target_username = None
562
-
for ref in (self.links_data or {}).get("references", []):
563
-
if ref.get("target_url") == url:
564
-
target_username = ref.get("target_username")
565
-
if ref.get("target_entry_id"):
566
-
target_entry = self._find_entry_by_id(ref["target_entry_id"])
569
-
shared_refs.append({
572
-
"referencing_entries": referencing_entries[url],
573
-
"target_username": target_username,
574
-
"target_title": target_entry.title if target_entry else None
577
-
return sorted(shared_refs, key=lambda x: x["count"], reverse=True)
579
-
def generate_site(self) -> None:
580
-
"""Generate the static website."""
581
-
# Create output directory
582
-
self.output_dir.mkdir(parents=True, exist_ok=True)
584
-
# Create static directories
585
-
(self.output_dir / "css").mkdir(exist_ok=True)
586
-
(self.output_dir / "js").mkdir(exist_ok=True)
589
-
css_template = self.env.get_template("style.css")
590
-
css_content = css_template.render()
591
-
with open(self.output_dir / "css" / "style.css", "w") as f:
592
-
f.write(css_content)
594
-
# Generate JavaScript
595
-
js_template = self.env.get_template("script.js")
596
-
js_content = js_template.render()
597
-
with open(self.output_dir / "js" / "script.js", "w") as f:
598
-
f.write(js_content)
600
-
# Prepare common template data
602
-
"title": "Energy & Environment Group",
603
-
"generated_at": datetime.now().isoformat(),
604
-
"get_display_name": self.get_display_name,
605
-
"get_user_homepage": self.get_user_homepage,
606
-
"clean_html_summary": self.clean_html_summary,
607
-
"safe_anchor_id": safe_anchor_id,
610
-
# Build unified timeline
611
-
timeline_items = []
613
-
# Only consider the threads that will actually be displayed
614
-
displayed_threads = self.threads[:20] # Limit to 20 threads
616
-
# Track which entries are part of displayed threads
617
-
threaded_entry_ids = set()
618
-
for thread in displayed_threads:
619
-
for item in thread:
620
-
threaded_entry_ids.add(item["entry_id"])
622
-
# Add threads to timeline (using the date of the most recent post)
623
-
for thread in displayed_threads:
624
-
most_recent_date = max(
625
-
item["entry"].updated or item["entry"].published or datetime.min
628
-
timeline_items.append({
630
-
"date": most_recent_date,
634
-
# Add individual posts (not in threads)
635
-
for username, entry in self.entries[:50]:
636
-
if entry.id not in threaded_entry_ids:
637
-
# Check if this entry has references
639
-
entry.id in self.reference_details
640
-
if hasattr(self, "reference_details")
646
-
refs = self.reference_details.get(entry.id, [])
649
-
if r.get("target_id") != entry.id
650
-
and r.get("source_id") != entry.id
653
-
timeline_items.append({
655
-
"date": entry.updated or entry.published or datetime.min,
657
-
"username": username,
658
-
"display_name": self.get_display_name(username),
660
-
"references": refs if refs else None
664
-
# Sort unified timeline by date (newest first)
665
-
timeline_items.sort(key=lambda x: x["date"], reverse=True)
667
-
# Limit timeline to what will actually be rendered
668
-
timeline_items = timeline_items[:50] # Limit to 50 items total
670
-
# Add cross-thread linking for repeat blog references
671
-
self._add_cross_thread_links(timeline_items)
673
-
# Prepare outgoing links data
674
-
outgoing_links = []
675
-
if self.links_data and "links" in self.links_data:
676
-
for url, link_info in self.links_data["links"].items():
677
-
referencing_entries = []
678
-
for entry_id in link_info.get("referencing_entries", []):
679
-
for username, entry in self.entries:
680
-
if entry.id == entry_id:
681
-
referencing_entries.append(
682
-
(self.get_display_name(username), entry)
686
-
if referencing_entries:
687
-
# Sort by date - prioritize updated over published
688
-
referencing_entries.sort(
689
-
key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True
691
-
outgoing_links.append(
694
-
"target_username": link_info.get("target_username"),
695
-
"entries": referencing_entries,
699
-
# Sort links by most recent reference - prioritize updated over published
700
-
outgoing_links.sort(
701
-
key=lambda x: x["entries"][0][1].updated
702
-
or x["entries"][0][1].published or datetime.min,
706
-
# Prepare users data
707
-
users: list[UserData] = []
709
-
for username, user_metadata in self.index.users.items():
710
-
# Get recent entries for this user with display names
712
-
(self.get_display_name(u), e)
713
-
for u, e in self.entries
717
-
{"metadata": user_metadata, "recent_entries": user_entries}
719
-
# Sort by entry count
720
-
users.sort(key=lambda x: x["metadata"].entry_count, reverse=True)
722
-
# Generate timeline page
723
-
timeline_template = self.env.get_template("timeline.html")
724
-
timeline_content = timeline_template.render(
727
-
timeline_items=timeline_items, # Already limited above
729
-
with open(self.output_dir / "timeline.html", "w") as f:
730
-
f.write(timeline_content)
732
-
# Generate links page
733
-
links_template = self.env.get_template("links.html")
734
-
links_content = links_template.render(
737
-
outgoing_links=outgoing_links[:100],
739
-
with open(self.output_dir / "links.html", "w") as f:
740
-
f.write(links_content)
742
-
# Generate users page
743
-
users_template = self.env.get_template("users.html")
744
-
users_content = users_template.render(
749
-
with open(self.output_dir / "users.html", "w") as f:
750
-
f.write(users_content)
752
-
# Generate main index page (redirect to timeline)
753
-
index_template = self.env.get_template("index.html")
754
-
index_content = index_template.render(**base_data)
755
-
with open(self.output_dir / "index.html", "w") as f:
756
-
f.write(index_content)
758
-
console.print(f"[green]โ[/green] Generated website at {self.output_dir}")
759
-
console.print(f" - {len(self.entries)} entries")
760
-
console.print(f" - {len(self.threads)} conversation threads")
761
-
console.print(f" - {len(outgoing_links)} outgoing links")
762
-
console.print(f" - {len(users)} users")
764
-
" - Generated pages: index.html, timeline.html, links.html, users.html"
770
-
output: Path = typer.Option(
771
-
Path("./thicket-site"),
774
-
help="Output directory for the generated website",
776
-
force: bool = typer.Option(
777
-
False, "--force", "-f", help="Overwrite existing output directory"
779
-
config_file: Path = typer.Option(
780
-
Path("thicket.yaml"), "--config", help="Configuration file path"
783
-
"""Generate a static HTML website from thicket data."""
784
-
config = load_config(config_file)
786
-
if not config.git_store:
787
-
console.print("[red]No git store path configured[/red]")
788
-
raise typer.Exit(1)
790
-
git_store = GitStore(config.git_store)
792
-
# Check if output directory exists
793
-
if output.exists() and not force:
795
-
f"[red]Output directory {output} already exists. Use --force to overwrite.[/red]"
797
-
raise typer.Exit(1)
799
-
# Clean output directory if forcing
800
-
if output.exists() and force:
801
-
shutil.rmtree(output)
804
-
generator = WebsiteGenerator(git_store, output)
806
-
console.print("[bold]Generating static website...[/bold]")
807
-
generator.load_data()
808
-
generator.build_threads()
809
-
generator.generate_site()
811
-
except Exception as e:
812
-
console.print(f"[red]Error generating website: {e}[/red]")
813
-
raise typer.Exit(1) from e
816
-
<file path="src/thicket/templates/base.html">
820
-
<meta charset="UTF-8">
821
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
822
-
<title>{% block page_title %}{{ title }}{% endblock %}</title>
823
-
<link rel="stylesheet" href="css/style.css">
826
-
<header class="site-header">
827
-
<div class="header-content">
828
-
<h1 class="site-title">{{ title }}</h1>
829
-
<nav class="site-nav">
830
-
<a href="timeline.html" class="nav-link {% if page == 'timeline' %}active{% endif %}">Timeline</a>
831
-
<a href="links.html" class="nav-link {% if page == 'links' %}active{% endif %}">Links</a>
832
-
<a href="users.html" class="nav-link {% if page == 'users' %}active{% endif %}">Users</a>
837
-
<main class="main-content">
838
-
{% block content %}{% endblock %}
841
-
<footer class="site-footer">
842
-
<p>Generated on {{ generated_at }} by <a href="https://github.com/avsm/thicket">Thicket</a></p>
845
-
<script src="js/script.js"></script>
850
-
<file path="src/thicket/templates/index.html">
854
-
<meta charset="UTF-8">
855
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
856
-
<title>{{ title }}</title>
857
-
<meta http-equiv="refresh" content="0; url=timeline.html">
858
-
<link rel="canonical" href="timeline.html">
861
-
<p>Redirecting to <a href="timeline.html">Timeline</a>...</p>
866
-
<file path="src/thicket/templates/links.html">
867
-
{% extends "base.html" %}
869
-
{% block page_title %}Outgoing Links - {{ title }}{% endblock %}
871
-
{% block content %}
872
-
<div class="page-content">
873
-
<h2>Outgoing Links</h2>
874
-
<p class="page-description">External links referenced in blog posts, ordered by most recent reference.</p>
876
-
{% for link in outgoing_links %}
877
-
<article class="link-group">
878
-
<h3 class="link-url">
879
-
<a href="{{ link.url }}" target="_blank">{{ link.url|truncate(80) }}</a>
880
-
{% if link.target_username %}
881
-
<span class="target-user">({{ link.target_username }})</span>
884
-
<div class="referencing-entries">
885
-
<span class="ref-count">Referenced in {{ link.entries|length }} post(s):</span>
887
-
{% for display_name, entry in link.entries[:5] %}
889
-
<span class="author">{{ display_name }}</span> -
890
-
<a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a>
891
-
<time datetime="{{ entry.updated or entry.published }}">
892
-
({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }})
896
-
{% if link.entries|length > 5 %}
897
-
<li class="more">... and {{ link.entries|length - 5 }} more</li>
907
-
<file path="src/thicket/templates/script.js">
908
-
// Enhanced functionality for thicket website
909
-
document.addEventListener('DOMContentLoaded', function() {
911
-
// Enhance thread collapsing (optional feature)
912
-
const threadHeaders = document.querySelectorAll('.thread-header');
913
-
threadHeaders.forEach(header => {
914
-
header.style.cursor = 'pointer';
915
-
header.addEventListener('click', function() {
916
-
const thread = this.parentElement;
917
-
const entries = thread.querySelectorAll('.thread-entry');
919
-
// Toggle visibility of all but the first entry
920
-
for (let i = 1; i < entries.length; i++) {
921
-
entries[i].style.display = entries[i].style.display === 'none' ? 'block' : 'none';
924
-
// Update thread count text
925
-
const count = this.querySelector('.thread-count');
926
-
if (entries[1] && entries[1].style.display === 'none') {
927
-
count.textContent = count.textContent.replace('posts', 'posts (collapsed)');
929
-
count.textContent = count.textContent.replace(' (collapsed)', '');
934
-
// Add relative time display
935
-
const timeElements = document.querySelectorAll('time');
936
-
timeElements.forEach(timeEl => {
937
-
const datetime = new Date(timeEl.getAttribute('datetime'));
938
-
const now = new Date();
939
-
const diffMs = now - datetime;
940
-
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
943
-
if (diffDays === 0) {
944
-
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
945
-
if (diffHours === 0) {
946
-
const diffMinutes = Math.floor(diffMs / (1000 * 60));
947
-
relativeTime = diffMinutes === 0 ? 'just now' : `${diffMinutes}m ago`;
949
-
relativeTime = `${diffHours}h ago`;
951
-
} else if (diffDays === 1) {
952
-
relativeTime = 'yesterday';
953
-
} else if (diffDays < 7) {
954
-
relativeTime = `${diffDays}d ago`;
955
-
} else if (diffDays < 30) {
956
-
const weeks = Math.floor(diffDays / 7);
957
-
relativeTime = weeks === 1 ? '1w ago' : `${weeks}w ago`;
958
-
} else if (diffDays < 365) {
959
-
const months = Math.floor(diffDays / 30);
960
-
relativeTime = months === 1 ? '1mo ago' : `${months}mo ago`;
962
-
const years = Math.floor(diffDays / 365);
963
-
relativeTime = years === 1 ? '1y ago' : `${years}y ago`;
966
-
// Add relative time as title attribute
967
-
timeEl.setAttribute('title', timeEl.textContent);
968
-
timeEl.textContent = relativeTime;
971
-
// Enhanced anchor link scrolling for shared references
972
-
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
973
-
anchor.addEventListener('click', function (e) {
974
-
e.preventDefault();
975
-
const target = document.querySelector(this.getAttribute('href'));
977
-
target.scrollIntoView({
978
-
behavior: 'smooth',
982
-
// Highlight the target briefly
983
-
const timelineEntry = target.closest('.timeline-entry');
984
-
if (timelineEntry) {
985
-
timelineEntry.style.outline = '2px solid var(--primary-color)';
986
-
timelineEntry.style.borderRadius = '8px';
988
-
timelineEntry.style.outline = '';
989
-
timelineEntry.style.borderRadius = '';
998
-
<file path="src/thicket/templates/style.css">
999
-
/* Modern, clean design with high-density text and readable theme */
1002
-
--primary-color: #2c3e50;
1003
-
--secondary-color: #3498db;
1004
-
--accent-color: #e74c3c;
1005
-
--background: #ffffff;
1006
-
--surface: #f8f9fa;
1007
-
--text-primary: #2c3e50;
1008
-
--text-secondary: #7f8c8d;
1009
-
--border-color: #e0e0e0;
1010
-
--thread-indent: 20px;
1011
-
--max-width: 1200px;
1017
-
box-sizing: border-box;
1021
-
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
1024
-
color: var(--text-primary);
1025
-
background-color: var(--background);
1030
-
background-color: var(--surface);
1031
-
border-bottom: 1px solid var(--border-color);
1032
-
padding: 0.75rem 0;
1039
-
max-width: var(--max-width);
1043
-
justify-content: space-between;
1044
-
align-items: center;
1048
-
font-size: 1.5rem;
1050
-
color: var(--primary-color);
1061
-
text-decoration: none;
1062
-
color: var(--text-secondary);
1064
-
font-size: 0.95rem;
1065
-
padding: 0.5rem 0.75rem;
1066
-
border-radius: 4px;
1067
-
transition: all 0.2s ease;
1071
-
color: var(--primary-color);
1072
-
background-color: var(--background);
1075
-
.nav-link.active {
1076
-
color: var(--secondary-color);
1077
-
background-color: var(--background);
1081
-
/* Main Content */
1083
-
max-width: var(--max-width);
1084
-
margin: 2rem auto;
1092
-
.page-description {
1093
-
color: var(--text-secondary);
1094
-
margin-bottom: 1.5rem;
1095
-
font-style: italic;
1100
-
margin-bottom: 2rem;
1104
-
font-size: 1.3rem;
1106
-
margin-bottom: 0.75rem;
1107
-
color: var(--primary-color);
1111
-
font-size: 1.1rem;
1113
-
margin-bottom: 0.75rem;
1114
-
color: var(--primary-color);
1117
-
/* Entries and Threads */
1119
-
margin-bottom: 1.5rem;
1121
-
background-color: var(--surface);
1122
-
border-radius: 4px;
1123
-
border: 1px solid var(--border-color);
1126
-
/* Timeline-style entries */
1128
-
margin-bottom: 0.5rem;
1129
-
padding: 0.5rem 0.75rem;
1131
-
background: transparent;
1132
-
transition: background-color 0.2s ease;
1135
-
.timeline-entry:hover {
1136
-
background-color: var(--surface);
1140
-
display: inline-flex;
1142
-
align-items: center;
1143
-
font-size: 0.75rem;
1144
-
color: var(--text-secondary);
1145
-
margin-bottom: 0.25rem;
1149
-
font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
1150
-
font-size: 0.75rem;
1151
-
color: var(--text-secondary);
1154
-
.timeline-author {
1156
-
color: var(--primary-color);
1157
-
font-size: 0.8rem;
1158
-
text-decoration: none;
1161
-
.timeline-author:hover {
1162
-
color: var(--secondary-color);
1163
-
text-decoration: underline;
1166
-
.timeline-content {
1171
-
font-size: 0.95rem;
1175
-
.timeline-title a {
1176
-
color: var(--primary-color);
1177
-
text-decoration: none;
1180
-
.timeline-title a:hover {
1181
-
color: var(--secondary-color);
1182
-
text-decoration: underline;
1185
-
.timeline-summary {
1186
-
color: var(--text-secondary);
1187
-
font-size: 0.9rem;
1191
-
/* Legacy styles for other sections */
1192
-
.entry-meta, .thread-header {
1195
-
align-items: center;
1196
-
margin-bottom: 0.5rem;
1197
-
font-size: 0.85rem;
1198
-
color: var(--text-secondary);
1203
-
color: var(--primary-color);
1207
-
font-size: 0.85rem;
1211
-
font-size: 1.1rem;
1213
-
margin-bottom: 0.5rem;
1217
-
color: var(--primary-color);
1218
-
text-decoration: none;
1222
-
color: var(--secondary-color);
1223
-
text-decoration: underline;
1227
-
color: var(--text-primary);
1229
-
margin-top: 0.5rem;
1232
-
/* Enhanced Threading Styles */
1234
-
/* Conversation Clusters */
1235
-
.conversation-cluster {
1236
-
background-color: var(--background);
1237
-
border: 2px solid var(--border-color);
1238
-
border-radius: 8px;
1239
-
margin-bottom: 2rem;
1241
-
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
1244
-
.conversation-header {
1245
-
background: linear-gradient(135deg, var(--surface) 0%, #f1f3f4 100%);
1246
-
padding: 0.75rem 1rem;
1247
-
border-bottom: 1px solid var(--border-color);
1250
-
.conversation-meta {
1252
-
justify-content: space-between;
1253
-
align-items: center;
1258
-
.conversation-count {
1260
-
color: var(--secondary-color);
1261
-
font-size: 0.9rem;
1264
-
.conversation-participants {
1265
-
font-size: 0.8rem;
1266
-
color: var(--text-secondary);
1268
-
text-align: right;
1271
-
.conversation-flow {
1275
-
/* Threaded Conversation Entries */
1276
-
.conversation-entry {
1277
-
position: relative;
1278
-
margin-bottom: 0.75rem;
1280
-
align-items: flex-start;
1283
-
.conversation-entry.level-0 {
1287
-
.conversation-entry.level-1 {
1288
-
margin-left: 1.5rem;
1291
-
.conversation-entry.level-2 {
1292
-
margin-left: 3rem;
1295
-
.conversation-entry.level-3 {
1296
-
margin-left: 4.5rem;
1299
-
.conversation-entry.level-4 {
1300
-
margin-left: 6rem;
1303
-
.entry-connector {
1305
-
background-color: var(--secondary-color);
1306
-
margin-right: 0.75rem;
1307
-
margin-top: 0.25rem;
1309
-
border-radius: 2px;
1313
-
.conversation-entry.level-0 .entry-connector {
1314
-
background-color: var(--accent-color);
1320
-
background-color: var(--surface);
1322
-
border-radius: 6px;
1323
-
border: 1px solid var(--border-color);
1324
-
transition: all 0.2s ease;
1327
-
.entry-content:hover {
1328
-
border-color: var(--secondary-color);
1329
-
box-shadow: 0 2px 8px rgba(52, 152, 219, 0.1);
1332
-
/* Reference Indicators */
1333
-
.reference-indicators {
1334
-
display: inline-flex;
1336
-
margin-left: 0.5rem;
1339
-
.ref-out, .ref-in {
1340
-
display: inline-block;
1343
-
border-radius: 50%;
1344
-
text-align: center;
1345
-
line-height: 1rem;
1346
-
font-size: 0.7rem;
1347
-
font-weight: bold;
1351
-
background-color: #e8f5e8;
1356
-
background-color: #e8f0ff;
1360
-
/* Reference Badges for Individual Posts */
1361
-
.timeline-entry.with-references {
1362
-
background-color: var(--surface);
1365
-
/* Conversation posts in unified timeline */
1366
-
.timeline-entry.conversation-post {
1367
-
background: transparent;
1369
-
margin-bottom: 0.5rem;
1370
-
padding: 0.5rem 0.75rem;
1373
-
.timeline-entry.conversation-post.level-0 {
1375
-
border-left: 2px solid var(--accent-color);
1376
-
padding-left: 0.75rem;
1379
-
.timeline-entry.conversation-post.level-1 {
1380
-
margin-left: 1.5rem;
1381
-
border-left: 2px solid var(--secondary-color);
1382
-
padding-left: 0.75rem;
1385
-
.timeline-entry.conversation-post.level-2 {
1386
-
margin-left: 3rem;
1387
-
border-left: 2px solid var(--text-secondary);
1388
-
padding-left: 0.75rem;
1391
-
.timeline-entry.conversation-post.level-3 {
1392
-
margin-left: 4.5rem;
1393
-
border-left: 2px solid var(--text-secondary);
1394
-
padding-left: 0.75rem;
1397
-
.timeline-entry.conversation-post.level-4 {
1398
-
margin-left: 6rem;
1399
-
border-left: 2px solid var(--text-secondary);
1400
-
padding-left: 0.75rem;
1403
-
/* Cross-thread linking */
1404
-
.cross-thread-links {
1405
-
margin-top: 0.5rem;
1406
-
padding-top: 0.5rem;
1407
-
border-top: 1px solid var(--border-color);
1410
-
.cross-thread-indicator {
1411
-
font-size: 0.75rem;
1412
-
color: var(--text-secondary);
1413
-
background-color: var(--surface);
1414
-
padding: 0.25rem 0.5rem;
1415
-
border-radius: 12px;
1416
-
border: 1px solid var(--border-color);
1417
-
display: inline-block;
1420
-
/* Inline shared references styling */
1421
-
.inline-shared-refs {
1422
-
margin-left: 0.5rem;
1423
-
font-size: 0.85rem;
1424
-
color: var(--text-secondary);
1427
-
.shared-ref-link {
1428
-
color: var(--primary-color);
1429
-
text-decoration: none;
1431
-
transition: color 0.2s ease;
1434
-
.shared-ref-link:hover {
1435
-
color: var(--secondary-color);
1436
-
text-decoration: underline;
1439
-
.shared-ref-more {
1440
-
font-style: italic;
1441
-
color: var(--text-secondary);
1442
-
font-size: 0.8rem;
1443
-
margin-left: 0.25rem;
1446
-
.user-anchor, .post-anchor {
1447
-
position: absolute;
1448
-
margin-top: -60px; /* Offset for fixed header */
1449
-
pointer-events: none;
1452
-
.cross-thread-link {
1453
-
color: var(--primary-color);
1454
-
text-decoration: none;
1456
-
transition: color 0.2s ease;
1459
-
.cross-thread-link:hover {
1460
-
color: var(--secondary-color);
1461
-
text-decoration: underline;
1464
-
.reference-badges {
1467
-
margin-left: 0.5rem;
1472
-
display: inline-block;
1473
-
padding: 0.1rem 0.4rem;
1474
-
border-radius: 12px;
1475
-
font-size: 0.7rem;
1477
-
text-transform: uppercase;
1478
-
letter-spacing: 0.05em;
1481
-
.ref-badge.ref-outbound {
1482
-
background-color: #e8f5e8;
1484
-
border: 1px solid #c3e6c3;
1487
-
.ref-badge.ref-inbound {
1488
-
background-color: #e8f0ff;
1490
-
border: 1px solid #b3d9ff;
1493
-
/* Author Color Coding */
1494
-
.timeline-author {
1495
-
position: relative;
1498
-
.timeline-author::before {
1500
-
display: inline-block;
1503
-
border-radius: 50%;
1504
-
margin-right: 0.5rem;
1505
-
background-color: var(--secondary-color);
1508
-
/* Generate consistent colors for authors */
1509
-
.author-avsm::before { background-color: #e74c3c; }
1510
-
.author-mort::before { background-color: #3498db; }
1511
-
.author-mte::before { background-color: #2ecc71; }
1512
-
.author-ryan::before { background-color: #f39c12; }
1513
-
.author-mwd::before { background-color: #9b59b6; }
1514
-
.author-dra::before { background-color: #1abc9c; }
1515
-
.author-pf341::before { background-color: #34495e; }
1516
-
.author-sadiqj::before { background-color: #e67e22; }
1517
-
.author-martinkl::before { background-color: #8e44ad; }
1518
-
.author-jonsterling::before { background-color: #27ae60; }
1519
-
.author-jon::before { background-color: #f1c40f; }
1520
-
.author-onkar::before { background-color: #e91e63; }
1521
-
.author-gabriel::before { background-color: #00bcd4; }
1522
-
.author-jess::before { background-color: #ff5722; }
1523
-
.author-ibrahim::before { background-color: #607d8b; }
1524
-
.author-andres::before { background-color: #795548; }
1525
-
.author-eeg::before { background-color: #ff9800; }
1527
-
/* Section Headers */
1528
-
.conversations-section h3,
1529
-
.referenced-posts-section h3,
1530
-
.individual-posts-section h3 {
1531
-
border-bottom: 2px solid var(--border-color);
1532
-
padding-bottom: 0.5rem;
1533
-
margin-bottom: 1.5rem;
1534
-
position: relative;
1537
-
.conversations-section h3::before {
1539
-
margin-right: 0.5rem;
1542
-
.referenced-posts-section h3::before {
1544
-
margin-right: 0.5rem;
1547
-
.individual-posts-section h3::before {
1549
-
margin-right: 0.5rem;
1552
-
/* Legacy thread styles (for backward compatibility) */
1554
-
background-color: var(--background);
1555
-
border: 1px solid var(--border-color);
1558
-
margin-bottom: 1rem;
1562
-
background-color: var(--surface);
1563
-
padding: 0.5rem 0.75rem;
1564
-
border-bottom: 1px solid var(--border-color);
1569
-
color: var(--secondary-color);
1573
-
padding: 0.5rem 0.75rem;
1574
-
border-bottom: 1px solid var(--border-color);
1577
-
.thread-entry:last-child {
1578
-
border-bottom: none;
1581
-
.thread-entry.reply {
1582
-
margin-left: var(--thread-indent);
1583
-
border-left: 3px solid var(--secondary-color);
1584
-
background-color: var(--surface);
1587
-
/* Links Section */
1589
-
background-color: var(--background);
1594
-
word-break: break-word;
1598
-
color: var(--secondary-color);
1599
-
text-decoration: none;
1602
-
.link-url a:hover {
1603
-
text-decoration: underline;
1607
-
font-size: 0.9rem;
1608
-
color: var(--text-secondary);
1609
-
font-weight: normal;
1612
-
.referencing-entries {
1613
-
margin-top: 0.75rem;
1618
-
color: var(--text-secondary);
1619
-
font-size: 0.9rem;
1622
-
.referencing-entries ul {
1624
-
margin-top: 0.5rem;
1625
-
padding-left: 1rem;
1628
-
.referencing-entries li {
1629
-
margin-bottom: 0.25rem;
1630
-
font-size: 0.9rem;
1633
-
.referencing-entries .more {
1634
-
font-style: italic;
1635
-
color: var(--text-secondary);
1638
-
/* Users Section */
1640
-
background-color: var(--background);
1646
-
align-items: start;
1647
-
margin-bottom: 1rem;
1653
-
border-radius: 50%;
1654
-
object-fit: cover;
1658
-
margin-bottom: 0.25rem;
1662
-
font-size: 0.9rem;
1663
-
color: var(--text-secondary);
1664
-
font-weight: normal;
1668
-
font-size: 0.9rem;
1669
-
color: var(--text-secondary);
1673
-
color: var(--secondary-color);
1674
-
text-decoration: none;
1677
-
.user-meta a:hover {
1678
-
text-decoration: underline;
1690
-
font-size: 0.95rem;
1691
-
margin-bottom: 0.5rem;
1692
-
color: var(--text-secondary);
1701
-
margin-bottom: 0.25rem;
1702
-
font-size: 0.9rem;
1707
-
max-width: var(--max-width);
1708
-
margin: 3rem auto 2rem;
1709
-
padding: 1rem 2rem;
1710
-
text-align: center;
1711
-
color: var(--text-secondary);
1712
-
font-size: 0.85rem;
1713
-
border-top: 1px solid var(--border-color);
1717
-
color: var(--secondary-color);
1718
-
text-decoration: none;
1721
-
.site-footer a:hover {
1722
-
text-decoration: underline;
1726
-
@media (max-width: 768px) {
1728
-
font-size: 1.3rem;
1732
-
flex-direction: column;
1734
-
align-items: flex-start;
1745
-
.thread-entry.reply {
1746
-
margin-left: calc(var(--thread-indent) / 2);
1750
-
flex-direction: column;
1755
-
<file path="src/thicket/templates/timeline.html">
1756
-
{% extends "base.html" %}
1758
-
{% block page_title %}Timeline - {{ title }}{% endblock %}
1760
-
{% block content %}
1761
-
{% set seen_users = [] %}
1762
-
<div class="page-content">
1763
-
<h2>Recent Posts & Conversations</h2>
1765
-
<section class="unified-timeline">
1766
-
{% for item in timeline_items %}
1767
-
{% if item.type == "post" %}
1768
-
<!-- Individual Post -->
1769
-
<article class="timeline-entry {% if item.content.references %}with-references{% endif %}">
1770
-
<div class="timeline-meta">
1771
-
<time datetime="{{ item.content.entry.updated or item.content.entry.published }}" class="timeline-time">
1772
-
{{ (item.content.entry.updated or item.content.entry.published).strftime('%Y-%m-%d %H:%M') }}
1774
-
{% set homepage = get_user_homepage(item.content.username) %}
1775
-
{% if item.content.username not in seen_users %}
1776
-
<a id="{{ item.content.username }}" class="user-anchor"></a>
1777
-
{% set _ = seen_users.append(item.content.username) %}
1779
-
<a id="post-{{ loop.index0 }}-{{ safe_anchor_id(item.content.entry.id) }}" class="post-anchor"></a>
1781
-
<a href="{{ homepage }}" target="_blank" class="timeline-author">{{ item.content.display_name }}</a>
1783
-
<span class="timeline-author">{{ item.content.display_name }}</span>
1785
-
{% if item.content.references %}
1786
-
<div class="reference-badges">
1787
-
{% for ref in item.content.references %}
1788
-
{% if ref.type == 'outbound' %}
1789
-
<span class="ref-badge ref-outbound" title="References {{ ref.target_username or 'external post' }}">
1790
-
โ {{ ref.target_username or 'ext' }}
1792
-
{% elif ref.type == 'inbound' %}
1793
-
<span class="ref-badge ref-inbound" title="Referenced by {{ ref.source_username or 'external post' }}">
1794
-
โ {{ ref.source_username or 'ext' }}
1801
-
<div class="timeline-content">
1802
-
<strong class="timeline-title">
1803
-
<a href="{{ item.content.entry.link }}" target="_blank">{{ item.content.entry.title }}</a>
1805
-
{% if item.content.entry.summary %}
1806
-
<span class="timeline-summary">โ {{ clean_html_summary(item.content.entry.summary, 250) }}</span>
1808
-
{% if item.content.shared_references %}
1809
-
<span class="inline-shared-refs">
1810
-
{% for ref in item.content.shared_references[:3] %}
1811
-
{% if ref.target_username %}
1812
-
<a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %}
1815
-
{% if item.content.shared_references|length > 3 %}
1816
-
<span class="shared-ref-more">+{{ item.content.shared_references|length - 3 }} more</span>
1820
-
{% if item.content.cross_thread_links %}
1821
-
<div class="cross-thread-links">
1822
-
<span class="cross-thread-indicator">๐ Also appears: </span>
1823
-
{% for link in item.content.cross_thread_links %}
1824
-
<a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %}
1831
-
{% elif item.type == "thread" %}
1832
-
<!-- Conversation Thread -->
1833
-
{% set outer_loop_index = loop.index0 %}
1834
-
{% for thread_item in item.content %}
1835
-
<article class="timeline-entry conversation-post level-{{ thread_item.thread_level }}">
1836
-
<div class="timeline-meta">
1837
-
<time datetime="{{ thread_item.entry.updated or thread_item.entry.published }}" class="timeline-time">
1838
-
{{ (thread_item.entry.updated or thread_item.entry.published).strftime('%Y-%m-%d %H:%M') }}
1840
-
{% set homepage = get_user_homepage(thread_item.username) %}
1841
-
{% if thread_item.username not in seen_users %}
1842
-
<a id="{{ thread_item.username }}" class="user-anchor"></a>
1843
-
{% set _ = seen_users.append(thread_item.username) %}
1845
-
<a id="post-{{ outer_loop_index }}-{{ loop.index0 }}-{{ safe_anchor_id(thread_item.entry.id) }}" class="post-anchor"></a>
1847
-
<a href="{{ homepage }}" target="_blank" class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</a>
1849
-
<span class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</span>
1851
-
{% if thread_item.references_to or thread_item.referenced_by %}
1852
-
<span class="reference-indicators">
1853
-
{% if thread_item.references_to %}
1854
-
<span class="ref-out" title="References other posts">โ</span>
1856
-
{% if thread_item.referenced_by %}
1857
-
<span class="ref-in" title="Referenced by other posts">โ</span>
1862
-
<div class="timeline-content">
1863
-
<strong class="timeline-title">
1864
-
<a href="{{ thread_item.entry.link }}" target="_blank">{{ thread_item.entry.title }}</a>
1866
-
{% if thread_item.entry.summary %}
1867
-
<span class="timeline-summary">โ {{ clean_html_summary(thread_item.entry.summary, 300) }}</span>
1869
-
{% if thread_item.shared_references %}
1870
-
<span class="inline-shared-refs">
1871
-
{% for ref in thread_item.shared_references[:3] %}
1872
-
{% if ref.target_username %}
1873
-
<a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %}
1876
-
{% if thread_item.shared_references|length > 3 %}
1877
-
<span class="shared-ref-more">+{{ thread_item.shared_references|length - 3 }} more</span>
1881
-
{% if thread_item.cross_thread_links %}
1882
-
<div class="cross-thread-links">
1883
-
<span class="cross-thread-indicator">๐ Also appears: </span>
1884
-
{% for link in thread_item.cross_thread_links %}
1885
-
<a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %}
1899
-
<file path="src/thicket/templates/users.html">
1900
-
{% extends "base.html" %}
1902
-
{% block page_title %}Users - {{ title }}{% endblock %}
1904
-
{% block content %}
1905
-
<div class="page-content">
1907
-
<p class="page-description">All users contributing to this thicket, ordered by post count.</p>
1909
-
{% for user_info in users %}
1910
-
<article class="user-card">
1911
-
<div class="user-header">
1912
-
{% if user_info.metadata.icon and user_info.metadata.icon != "None" %}
1913
-
<img src="{{ user_info.metadata.icon }}" alt="{{ user_info.metadata.username }}" class="user-icon">
1915
-
<div class="user-info">
1917
-
{% if user_info.metadata.display_name %}
1918
-
{{ user_info.metadata.display_name }}
1919
-
<span class="username">({{ user_info.metadata.username }})</span>
1921
-
{{ user_info.metadata.username }}
1924
-
<div class="user-meta">
1925
-
{% if user_info.metadata.homepage %}
1926
-
<a href="{{ user_info.metadata.homepage }}" target="_blank">{{ user_info.metadata.homepage }}</a>
1928
-
{% if user_info.metadata.email %}
1929
-
<span class="separator">โข</span>
1930
-
<a href="mailto:{{ user_info.metadata.email }}">{{ user_info.metadata.email }}</a>
1932
-
<span class="separator">โข</span>
1933
-
<span class="post-count">{{ user_info.metadata.entry_count }} posts</span>
1938
-
{% if user_info.recent_entries %}
1939
-
<div class="user-recent">
1940
-
<h4>Recent posts:</h4>
1942
-
{% for display_name, entry in user_info.recent_entries %}
1944
-
<a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a>
1945
-
<time datetime="{{ entry.updated or entry.published }}">
1946
-
({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }})
1959
-
<file path="README.md">
1962
-
A modern CLI tool for persisting Atom/RSS feeds in Git repositories, designed to enable distributed webblog comment structures.
1966
-
- **Feed Auto-Discovery**: Automatically extracts user metadata from Atom/RSS feeds
1967
-
- **Git Storage**: Stores feed entries in a Git repository with full history
1968
-
- **Duplicate Management**: Manual curation of duplicate entries across feeds
1969
-
- **Modern CLI**: Built with Typer and Rich for beautiful terminal output
1970
-
- **Comprehensive Parsing**: Supports RSS 0.9x, RSS 1.0, RSS 2.0, and Atom feeds
1971
-
- **Cron-Friendly**: Designed for scheduled execution
1976
-
# Install from source
1979
-
# Or install with dev dependencies
1980
-
pip install -e .[dev]
1985
-
1. **Initialize a new thicket repository:**
1987
-
thicket init ./my-feeds
1990
-
2. **Add a user with their feed:**
1992
-
thicket add user "alice" --feed "https://alice.example.com/feed.xml"
1995
-
3. **Sync feeds to download entries:**
1997
-
thicket sync --all
2000
-
4. **List users and feeds:**
2002
-
thicket list users
2003
-
thicket list feeds
2004
-
thicket list entries
2011
-
thicket init <git-store-path> [--cache-dir <path>] [--config <config-file>]
2014
-
### Add Users and Feeds
2016
-
# Add user with auto-discovery
2017
-
thicket add user "username" --feed "https://example.com/feed.xml"
2019
-
# Add user with manual metadata
2020
-
thicket add user "username" \
2021
-
--feed "https://example.com/feed.xml" \
2022
-
--email "user@example.com" \
2023
-
--homepage "https://example.com" \
2024
-
--display-name "User Name"
2026
-
# Add additional feed to existing user
2027
-
thicket add feed "username" "https://example.com/other-feed.xml"
2033
-
thicket sync --all
2035
-
# Sync specific user
2036
-
thicket sync --user "username"
2038
-
# Dry run (preview changes)
2039
-
thicket sync --all --dry-run
2042
-
### List Information
2045
-
thicket list users
2048
-
thicket list feeds
2050
-
# List feeds for specific user
2051
-
thicket list feeds --user "username"
2053
-
# List recent entries
2054
-
thicket list entries --limit 20
2056
-
# List entries for specific user
2057
-
thicket list entries --user "username"
2060
-
### Manage Duplicates
2062
-
# List duplicate mappings
2063
-
thicket duplicates list
2065
-
# Mark entries as duplicates
2066
-
thicket duplicates add "https://example.com/dup" "https://example.com/canonical"
2068
-
# Remove duplicate mapping
2069
-
thicket duplicates remove "https://example.com/dup"
2074
-
Thicket uses a YAML configuration file (default: `thicket.yaml`):
2077
-
git_store: ./feeds-repo
2078
-
cache_dir: ~/.cache/thicket
2082
-
- https://alice.example.com/feed.xml
2083
-
email: alice@example.com
2084
-
homepage: https://alice.example.com
2085
-
display_name: Alice
2088
-
## Git Repository Structure
2092
-
โโโ index.json # User directory index
2093
-
โโโ duplicates.json # Duplicate entry mappings
2095
-
โ โโโ metadata.json # User metadata
2096
-
โ โโโ entry_id_1.json # Feed entries
2097
-
โ โโโ entry_id_2.json
2106
-
# Install in development mode
2107
-
pip install -e .[dev]
2114
-
black --check src/
2116
-
# Run type checking
2122
-
- **CLI**: Modern interface with Typer and Rich
2123
-
- **Feed Processing**: Universal parsing with feedparser
2124
-
- **Git Storage**: Structured storage with GitPython
2125
-
- **Data Models**: Pydantic for validation and serialization
2126
-
- **Async HTTP**: httpx for efficient feed fetching
2130
-
- **Blog Aggregation**: Collect and archive blog posts from multiple sources
2131
-
- **Comment Networks**: Enable distributed commenting systems
2132
-
- **Feed Archival**: Preserve feed history beyond typical feed depth limits
2133
-
- **Content Curation**: Manage and deduplicate content across feeds
2137
-
MIT License - see LICENSE file for details.
2140
-
<file path="src/thicket/cli/commands/index_cmd.py">
2141
-
"""CLI command for building reference index from blog entries."""
2144
-
from pathlib import Path
2145
-
from typing import Optional
2148
-
from rich.console import Console
2149
-
from rich.progress import (
2153
-
TaskProgressColumn,
2156
-
from rich.table import Table
2158
-
from ...core.git_store import GitStore
2159
-
from ...core.reference_parser import ReferenceIndex, ReferenceParser
2160
-
from ..main import app
2161
-
from ..utils import get_tsv_mode, load_config
2163
-
console = Console()
2168
-
config_file: Optional[Path] = typer.Option(
2172
-
help="Path to configuration file",
2174
-
output_file: Optional[Path] = typer.Option(
2178
-
help="Path to output index file (default: updates links.json in git store)",
2180
-
verbose: bool = typer.Option(
2184
-
help="Show detailed progress information",
2187
-
"""Build a reference index showing which blog entries reference others.
2189
-
This command analyzes all blog entries to detect cross-references between
2190
-
different blogs, creating an index that can be used to build threaded
2191
-
views of related content.
2193
-
Updates the unified links.json file with reference data.
2196
-
# Load configuration
2197
-
config = load_config(config_file)
2199
-
# Initialize Git store
2200
-
git_store = GitStore(config.git_store)
2202
-
# Initialize reference parser
2203
-
parser = ReferenceParser()
2205
-
# Build user domain mapping
2207
-
console.print("Building user domain mapping...")
2208
-
user_domains = parser.build_user_domain_mapping(git_store)
2211
-
console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")
2213
-
# Initialize reference index
2214
-
ref_index = ReferenceIndex()
2215
-
ref_index.user_domains = user_domains
2218
-
index = git_store._load_index()
2219
-
users = list(index.users.keys())
2222
-
console.print("[yellow]No users found in Git store[/yellow]")
2223
-
raise typer.Exit(0)
2225
-
# Process all entries
2227
-
total_references = 0
2228
-
all_references = []
2232
-
TextColumn("[progress.description]{task.description}"),
2234
-
TaskProgressColumn(),
2238
-
# Count total entries first
2239
-
counting_task = progress.add_task("Counting entries...", total=len(users))
2241
-
for username in users:
2242
-
entries = git_store.list_entries(username)
2243
-
entry_counts[username] = len(entries)
2244
-
total_entries += len(entries)
2245
-
progress.advance(counting_task)
2247
-
progress.remove_task(counting_task)
2249
-
# Process entries - extract references
2250
-
processing_task = progress.add_task(
2251
-
f"Extracting references from {total_entries} entries...",
2252
-
total=total_entries
2255
-
for username in users:
2256
-
entries = git_store.list_entries(username)
2258
-
for entry in entries:
2259
-
# Extract references from this entry
2260
-
references = parser.extract_references(entry, username, user_domains)
2261
-
all_references.extend(references)
2263
-
progress.advance(processing_task)
2265
-
if verbose and references:
2266
-
console.print(f" Found {len(references)} references in {username}:{entry.title[:50]}...")
2268
-
progress.remove_task(processing_task)
2270
-
# Resolve target_entry_ids for references
2271
-
if all_references:
2272
-
resolve_task = progress.add_task(
2273
-
f"Resolving {len(all_references)} references...",
2274
-
total=len(all_references)
2278
-
console.print(f"Resolving target entry IDs for {len(all_references)} references...")
2280
-
resolved_references = parser.resolve_target_entry_ids(all_references, git_store)
2282
-
# Count resolved references
2283
-
resolved_count = sum(1 for ref in resolved_references if ref.target_entry_id is not None)
2285
-
console.print(f"Resolved {resolved_count} out of {len(all_references)} references")
2287
-
# Add resolved references to index
2288
-
for ref in resolved_references:
2289
-
ref_index.add_reference(ref)
2290
-
total_references += 1
2291
-
progress.advance(resolve_task)
2293
-
progress.remove_task(resolve_task)
2295
-
# Determine output path
2297
-
output_path = output_file
2299
-
output_path = config.git_store / "links.json"
2301
-
# Load existing links data or create new structure
2302
-
if output_path.exists() and not output_file:
2303
-
# Load existing unified structure
2304
-
with open(output_path) as f:
2305
-
existing_data = json.load(f)
2307
-
# Create new structure
2310
-
"reverse_mapping": {},
2311
-
"user_domains": {}
2314
-
# Update with reference data
2315
-
existing_data["references"] = ref_index.to_dict()["references"]
2316
-
existing_data["user_domains"] = {k: list(v) for k, v in user_domains.items()}
2318
-
# Save updated structure
2319
-
with open(output_path, "w") as f:
2320
-
json.dump(existing_data, f, indent=2, default=str)
2323
-
if not get_tsv_mode():
2324
-
console.print("\n[green]โ Reference index built successfully[/green]")
2326
-
# Create summary table or TSV output
2327
-
if get_tsv_mode():
2328
-
print("Metric\tCount")
2329
-
print(f"Total Users\t{len(users)}")
2330
-
print(f"Total Entries\t{total_entries}")
2331
-
print(f"Total References\t{total_references}")
2332
-
print(f"Outbound Refs\t{len(ref_index.outbound_refs)}")
2333
-
print(f"Inbound Refs\t{len(ref_index.inbound_refs)}")
2334
-
print(f"Output File\t{output_path}")
2336
-
table = Table(title="Reference Index Summary")
2337
-
table.add_column("Metric", style="cyan")
2338
-
table.add_column("Count", style="green")
2340
-
table.add_row("Total Users", str(len(users)))
2341
-
table.add_row("Total Entries", str(total_entries))
2342
-
table.add_row("Total References", str(total_references))
2343
-
table.add_row("Outbound Refs", str(len(ref_index.outbound_refs)))
2344
-
table.add_row("Inbound Refs", str(len(ref_index.inbound_refs)))
2345
-
table.add_row("Output File", str(output_path))
2347
-
console.print(table)
2349
-
# Show some interesting statistics
2350
-
if total_references > 0:
2351
-
if not get_tsv_mode():
2352
-
console.print("\n[bold]Reference Statistics:[/bold]")
2354
-
# Most referenced users
2355
-
target_counts = {}
2356
-
unresolved_domains = set()
2358
-
for ref in ref_index.references:
2359
-
if ref.target_username:
2360
-
target_counts[ref.target_username] = target_counts.get(ref.target_username, 0) + 1
2362
-
# Track unresolved domains
2363
-
from urllib.parse import urlparse
2364
-
domain = urlparse(ref.target_url).netloc.lower()
2365
-
unresolved_domains.add(domain)
2368
-
if get_tsv_mode():
2369
-
print("Referenced User\tReference Count")
2370
-
for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
2371
-
print(f"{username}\t{count}")
2373
-
console.print("\nMost referenced users:")
2374
-
for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
2375
-
console.print(f" {username}: {count} references")
2377
-
if unresolved_domains and verbose:
2378
-
if get_tsv_mode():
2379
-
print("Unresolved Domain\tCount")
2380
-
for domain in sorted(list(unresolved_domains)[:10]):
2381
-
print(f"{domain}\t1")
2382
-
if len(unresolved_domains) > 10:
2383
-
print(f"... and {len(unresolved_domains) - 10} more\t...")
2385
-
console.print(f"\nUnresolved domains: {len(unresolved_domains)}")
2386
-
for domain in sorted(list(unresolved_domains)[:10]):
2387
-
console.print(f" {domain}")
2388
-
if len(unresolved_domains) > 10:
2389
-
console.print(f" ... and {len(unresolved_domains) - 10} more")
2391
-
except Exception as e:
2392
-
console.print(f"[red]Error building reference index: {e}[/red]")
2394
-
console.print_exception()
2395
-
raise typer.Exit(1)
2400
-
config_file: Optional[Path] = typer.Option(
2404
-
help="Path to configuration file",
2406
-
index_file: Optional[Path] = typer.Option(
2410
-
help="Path to reference index file (default: links.json in git store)",
2412
-
username: Optional[str] = typer.Option(
2416
-
help="Show threads for specific username only",
2418
-
entry_id: Optional[str] = typer.Option(
2422
-
help="Show thread for specific entry ID",
2424
-
min_size: int = typer.Option(
2428
-
help="Minimum thread size to display",
2431
-
"""Show threaded view of related blog entries.
2433
-
This command uses the reference index to show which blog entries
2434
-
are connected through cross-references, creating an email-style
2435
-
threaded view of the conversation.
2437
-
Reads reference data from the unified links.json file.
2440
-
# Load configuration
2441
-
config = load_config(config_file)
2443
-
# Determine index file path
2445
-
index_path = index_file
2447
-
index_path = config.git_store / "links.json"
2449
-
if not index_path.exists():
2450
-
console.print(f"[red]Links file not found: {index_path}[/red]")
2451
-
console.print("Run 'thicket links' and 'thicket index' first to build the reference index")
2452
-
raise typer.Exit(1)
2454
-
# Load unified data
2455
-
with open(index_path) as f:
2456
-
unified_data = json.load(f)
2458
-
# Check if references exist in the unified structure
2459
-
if "references" not in unified_data:
2460
-
console.print(f"[red]No references found in {index_path}[/red]")
2461
-
console.print("Run 'thicket index' first to build the reference index")
2462
-
raise typer.Exit(1)
2464
-
# Extract reference data and reconstruct ReferenceIndex
2465
-
ref_index = ReferenceIndex.from_dict({
2466
-
"references": unified_data["references"],
2467
-
"user_domains": unified_data.get("user_domains", {})
2470
-
# Initialize Git store to get entry details
2471
-
git_store = GitStore(config.git_store)
2473
-
if entry_id and username:
2474
-
# Show specific thread
2475
-
thread_members = ref_index.get_thread_members(username, entry_id)
2476
-
_display_thread(thread_members, ref_index, git_store, f"Thread for {username}:{entry_id}")
2479
-
# Show all threads involving this user
2480
-
user_index = git_store._load_index()
2481
-
user = user_index.get_user(username)
2483
-
console.print(f"[red]User not found: {username}[/red]")
2484
-
raise typer.Exit(1)
2486
-
entries = git_store.list_entries(username)
2487
-
threads_found = set()
2489
-
console.print(f"[bold]Threads involving {username}:[/bold]\n")
2491
-
for entry in entries:
2492
-
thread_members = ref_index.get_thread_members(username, entry.id)
2493
-
if len(thread_members) >= min_size:
2494
-
thread_key = tuple(sorted(thread_members))
2495
-
if thread_key not in threads_found:
2496
-
threads_found.add(thread_key)
2497
-
_display_thread(thread_members, ref_index, git_store, f"Thread #{len(threads_found)}")
2500
-
# Show all threads
2501
-
console.print("[bold]All conversation threads:[/bold]\n")
2503
-
all_threads = set()
2504
-
processed_entries = set()
2507
-
user_index = git_store._load_index()
2508
-
for username in user_index.users.keys():
2509
-
entries = git_store.list_entries(username)
2510
-
for entry in entries:
2511
-
entry_key = (username, entry.id)
2512
-
if entry_key in processed_entries:
2515
-
thread_members = ref_index.get_thread_members(username, entry.id)
2516
-
if len(thread_members) >= min_size:
2517
-
thread_key = tuple(sorted(thread_members))
2518
-
if thread_key not in all_threads:
2519
-
all_threads.add(thread_key)
2520
-
_display_thread(thread_members, ref_index, git_store, f"Thread #{len(all_threads)}")
2522
-
# Mark all members as processed
2523
-
for member in thread_members:
2524
-
processed_entries.add(member)
2526
-
if not all_threads:
2527
-
console.print("[yellow]No conversation threads found[/yellow]")
2528
-
console.print(f"(minimum thread size: {min_size})")
2530
-
except Exception as e:
2531
-
console.print(f"[red]Error showing threads: {e}[/red]")
2532
-
raise typer.Exit(1)
2535
-
def _display_thread(thread_members, ref_index, git_store, title):
2536
-
"""Display a single conversation thread."""
2537
-
console.print(f"[bold cyan]{title}[/bold cyan]")
2538
-
console.print(f"Thread size: {len(thread_members)} entries")
2540
-
# Get entry details for each member
2541
-
thread_entries = []
2542
-
for username, entry_id in thread_members:
2543
-
entry = git_store.get_entry(username, entry_id)
2545
-
thread_entries.append((username, entry))
2547
-
# Sort by publication date
2548
-
thread_entries.sort(key=lambda x: x[1].published or x[1].updated)
2551
-
for i, (username, entry) in enumerate(thread_entries):
2552
-
prefix = "โโ" if i < len(thread_entries) - 1 else "โโ"
2554
-
# Get references for this entry
2555
-
outbound = ref_index.get_outbound_refs(username, entry.id)
2556
-
inbound = ref_index.get_inbound_refs(username, entry.id)
2559
-
if outbound or inbound:
2560
-
ref_info = f" ({len(outbound)} out, {len(inbound)} in)"
2562
-
console.print(f" {prefix} [{username}] {entry.title[:60]}...{ref_info}")
2564
-
if entry.published:
2565
-
console.print(f" Published: {entry.published.strftime('%Y-%m-%d')}")
2567
-
console.print() # Empty line after each thread
2570
-
<file path="src/thicket/cli/commands/info_cmd.py">
2571
-
"""CLI command for displaying detailed information about a specific atom entry."""
2574
-
from pathlib import Path
2575
-
from typing import Optional
2578
-
from rich.console import Console
2579
-
from rich.panel import Panel
2580
-
from rich.table import Table
2581
-
from rich.text import Text
2583
-
from ...core.git_store import GitStore
2584
-
from ...core.reference_parser import ReferenceIndex
2585
-
from ..main import app
2586
-
from ..utils import load_config, get_tsv_mode
2588
-
console = Console()
2593
-
identifier: str = typer.Argument(
2595
-
help="The atom ID or URL of the entry to display information about"
2597
-
username: Optional[str] = typer.Option(
2601
-
help="Username to search for the entry (if not provided, searches all users)"
2603
-
config_file: Optional[Path] = typer.Option(
2604
-
Path("thicket.yaml"),
2607
-
help="Path to configuration file",
2609
-
show_content: bool = typer.Option(
2612
-
help="Include the full content of the entry in the output"
2615
-
"""Display detailed information about a specific atom entry.
2617
-
You can specify the entry using either its atom ID or URL.
2618
-
Shows all metadata for the given entry, including title, dates, categories,
2619
-
and summarizes all inbound and outbound links to/from other posts.
2622
-
# Load configuration
2623
-
config = load_config(config_file)
2625
-
# Initialize Git store
2626
-
git_store = GitStore(config.git_store)
2630
-
found_username = None
2632
-
# Check if identifier looks like a URL
2633
-
is_url = identifier.startswith(('http://', 'https://'))
2636
-
# Search specific username
2639
-
entries = git_store.list_entries(username)
2641
-
if str(e.link) == identifier:
2643
-
found_username = username
2646
-
# Search by atom ID
2647
-
entry = git_store.get_entry(username, identifier)
2649
-
found_username = username
2651
-
# Search all users
2652
-
index = git_store._load_index()
2653
-
for user in index.users.keys():
2656
-
entries = git_store.list_entries(user)
2658
-
if str(e.link) == identifier:
2660
-
found_username = user
2665
-
# Search by atom ID
2666
-
entry = git_store.get_entry(user, identifier)
2668
-
found_username = user
2671
-
if not entry or not found_username:
2673
-
console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found for user '{username}'[/red]")
2675
-
console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found in any user's entries[/red]")
2676
-
raise typer.Exit(1)
2678
-
# Load reference index if available
2679
-
links_path = config.git_store / "links.json"
2681
-
if links_path.exists():
2682
-
with open(links_path) as f:
2683
-
unified_data = json.load(f)
2685
-
# Check if references exist in the unified structure
2686
-
if "references" in unified_data:
2687
-
ref_index = ReferenceIndex.from_dict({
2688
-
"references": unified_data["references"],
2689
-
"user_domains": unified_data.get("user_domains", {})
2692
-
# Display information
2693
-
if get_tsv_mode():
2694
-
_display_entry_info_tsv(entry, found_username, ref_index, show_content)
2696
-
_display_entry_info(entry, found_username)
2699
-
_display_link_info(entry, found_username, ref_index)
2701
-
console.print("\n[yellow]No reference index found. Run 'thicket links' and 'thicket index' to build cross-reference data.[/yellow]")
2703
-
# Optionally display content
2704
-
if show_content and entry.content:
2705
-
_display_content(entry.content)
2707
-
except Exception as e:
2708
-
console.print(f"[red]Error displaying entry info: {e}[/red]")
2709
-
raise typer.Exit(1)
2712
-
def _display_entry_info(entry, username: str) -> None:
2713
-
"""Display basic entry information in a structured format."""
2715
-
# Create main info panel
2716
-
info_table = Table.grid(padding=(0, 2))
2717
-
info_table.add_column("Field", style="cyan bold", width=15)
2718
-
info_table.add_column("Value", style="white")
2720
-
info_table.add_row("User", f"[green]{username}[/green]")
2721
-
info_table.add_row("Atom ID", f"[blue]{entry.id}[/blue]")
2722
-
info_table.add_row("Title", entry.title)
2723
-
info_table.add_row("Link", str(entry.link))
2725
-
if entry.published:
2726
-
info_table.add_row("Published", entry.published.strftime("%Y-%m-%d %H:%M:%S UTC"))
2728
-
info_table.add_row("Updated", entry.updated.strftime("%Y-%m-%d %H:%M:%S UTC"))
2731
-
# Truncate long summaries
2732
-
summary = entry.summary[:200] + "..." if len(entry.summary) > 200 else entry.summary
2733
-
info_table.add_row("Summary", summary)
2735
-
if entry.categories:
2736
-
categories_text = ", ".join(entry.categories)
2737
-
info_table.add_row("Categories", categories_text)
2741
-
if "name" in entry.author:
2742
-
author_info.append(entry.author["name"])
2743
-
if "email" in entry.author:
2744
-
author_info.append(f"<{entry.author['email']}>")
2746
-
info_table.add_row("Author", " ".join(author_info))
2748
-
if entry.content_type:
2749
-
info_table.add_row("Content Type", entry.content_type)
2752
-
info_table.add_row("Rights", entry.rights)
2755
-
info_table.add_row("Source Feed", entry.source)
2759
-
title=f"[bold]Entry Information[/bold]",
2760
-
border_style="blue"
2763
-
console.print(panel)
2766
-
def _display_link_info(entry, username: str, ref_index: ReferenceIndex) -> None:
2767
-
"""Display inbound and outbound link information."""
2770
-
outbound_refs = ref_index.get_outbound_refs(username, entry.id)
2771
-
inbound_refs = ref_index.get_inbound_refs(username, entry.id)
2773
-
if not outbound_refs and not inbound_refs:
2774
-
console.print("\n[dim]No cross-references found for this entry.[/dim]")
2777
-
# Create links table
2778
-
links_table = Table(title="Cross-References")
2779
-
links_table.add_column("Direction", style="cyan", width=10)
2780
-
links_table.add_column("Target/Source", style="green", width=20)
2781
-
links_table.add_column("URL", style="blue", width=50)
2783
-
# Add outbound references
2784
-
for ref in outbound_refs:
2785
-
target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"
2786
-
links_table.add_row("โ Out", target_info, ref.target_url)
2788
-
# Add inbound references
2789
-
for ref in inbound_refs:
2790
-
source_info = f"{ref.source_username}:{ref.source_entry_id}"
2791
-
links_table.add_row("โ In", source_info, ref.target_url)
2794
-
console.print(links_table)
2797
-
console.print(f"\n[bold]Summary:[/bold] {len(outbound_refs)} outbound, {len(inbound_refs)} inbound references")
2800
-
def _display_content(content: str) -> None:
2801
-
"""Display the full content of the entry."""
2803
-
# Truncate very long content
2804
-
display_content = content
2805
-
if len(content) > 5000:
2806
-
display_content = content[:5000] + "\n\n[... content truncated ...]"
2810
-
title="[bold]Entry Content[/bold]",
2811
-
border_style="green",
2816
-
console.print(panel)
2819
-
def _display_entry_info_tsv(entry, username: str, ref_index: Optional[ReferenceIndex], show_content: bool) -> None:
2820
-
"""Display entry information in TSV format."""
2823
-
print("Field\tValue")
2824
-
print(f"User\t{username}")
2825
-
print(f"Atom ID\t{entry.id}")
2826
-
print(f"Title\t{entry.title.replace(chr(9), ' ').replace(chr(10), ' ').replace(chr(13), ' ')}")
2827
-
print(f"Link\t{entry.link}")
2829
-
if entry.published:
2830
-
print(f"Published\t{entry.published.strftime('%Y-%m-%d %H:%M:%S UTC')}")
2832
-
print(f"Updated\t{entry.updated.strftime('%Y-%m-%d %H:%M:%S UTC')}")
2835
-
# Escape tabs and newlines in summary
2836
-
summary = entry.summary.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
2837
-
print(f"Summary\t{summary}")
2839
-
if entry.categories:
2840
-
print(f"Categories\t{', '.join(entry.categories)}")
2844
-
if "name" in entry.author:
2845
-
author_info.append(entry.author["name"])
2846
-
if "email" in entry.author:
2847
-
author_info.append(f"<{entry.author['email']}>")
2849
-
print(f"Author\t{' '.join(author_info)}")
2851
-
if entry.content_type:
2852
-
print(f"Content Type\t{entry.content_type}")
2855
-
print(f"Rights\t{entry.rights}")
2858
-
print(f"Source Feed\t{entry.source}")
2860
-
# Add reference info if available
2862
-
outbound_refs = ref_index.get_outbound_refs(username, entry.id)
2863
-
inbound_refs = ref_index.get_inbound_refs(username, entry.id)
2865
-
print(f"Outbound References\t{len(outbound_refs)}")
2866
-
print(f"Inbound References\t{len(inbound_refs)}")
2868
-
# Show each reference
2869
-
for ref in outbound_refs:
2870
-
target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"
2871
-
print(f"Outbound Reference\t{target_info}\t{ref.target_url}")
2873
-
for ref in inbound_refs:
2874
-
source_info = f"{ref.source_username}:{ref.source_entry_id}"
2875
-
print(f"Inbound Reference\t{source_info}\t{ref.target_url}")
2877
-
# Show content if requested
2878
-
if show_content and entry.content:
2879
-
# Escape tabs and newlines in content
2880
-
content = entry.content.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
2881
-
print(f"Content\t{content}")
2884
-
<file path="src/thicket/cli/commands/init.py">
2885
-
"""Initialize command for thicket."""
2887
-
from pathlib import Path
2888
-
from typing import Optional
2891
-
from pydantic import ValidationError
2893
-
from ...core.git_store import GitStore
2894
-
from ...models import ThicketConfig
2895
-
from ..main import app
2896
-
from ..utils import print_error, print_success, save_config
2901
-
git_store: Path = typer.Argument(..., help="Path to Git repository for storing feeds"),
2902
-
cache_dir: Optional[Path] = typer.Option(
2903
-
None, "--cache-dir", "-c", help="Cache directory (default: ~/.cache/thicket)"
2905
-
config_file: Optional[Path] = typer.Option(
2906
-
None, "--config", help="Configuration file path (default: thicket.yaml)"
2908
-
force: bool = typer.Option(
2909
-
False, "--force", "-f", help="Overwrite existing configuration"
2912
-
"""Initialize a new thicket configuration and Git store."""
2914
-
# Set default paths
2915
-
if cache_dir is None:
2916
-
from platformdirs import user_cache_dir
2917
-
cache_dir = Path(user_cache_dir("thicket"))
2919
-
if config_file is None:
2920
-
config_file = Path("thicket.yaml")
2922
-
# Check if config already exists
2923
-
if config_file.exists() and not force:
2924
-
print_error(f"Configuration file already exists: {config_file}")
2925
-
print_error("Use --force to overwrite")
2926
-
raise typer.Exit(1)
2928
-
# Create cache directory
2929
-
cache_dir.mkdir(parents=True, exist_ok=True)
2931
-
# Create Git store
2933
-
GitStore(git_store)
2934
-
print_success(f"Initialized Git store at: {git_store}")
2935
-
except Exception as e:
2936
-
print_error(f"Failed to initialize Git store: {e}")
2937
-
raise typer.Exit(1) from e
2939
-
# Create configuration
2941
-
config = ThicketConfig(
2942
-
git_store=git_store,
2943
-
cache_dir=cache_dir,
2947
-
save_config(config, config_file)
2948
-
print_success(f"Created configuration file: {config_file}")
2950
-
except ValidationError as e:
2951
-
print_error(f"Invalid configuration: {e}")
2952
-
raise typer.Exit(1) from e
2953
-
except Exception as e:
2954
-
print_error(f"Failed to create configuration: {e}")
2955
-
raise typer.Exit(1) from e
2957
-
print_success("Thicket initialized successfully!")
2958
-
print_success(f"Git store: {git_store}")
2959
-
print_success(f"Cache directory: {cache_dir}")
2960
-
print_success(f"Configuration: {config_file}")
2961
-
print_success("Run 'thicket add user' to add your first user and feed.")
2964
-
<file path="src/thicket/cli/__init__.py">
2965
-
"""CLI interface for thicket."""
2967
-
from .main import app
2972
-
<file path="src/thicket/core/__init__.py">
2973
-
"""Core business logic for thicket."""
2975
-
from .feed_parser import FeedParser
2976
-
from .git_store import GitStore
2978
-
__all__ = ["FeedParser", "GitStore"]
2981
-
<file path="src/thicket/core/feed_parser.py">
2982
-
"""Feed parsing and normalization with auto-discovery."""
2984
-
from datetime import datetime
2985
-
from typing import Optional
2986
-
from urllib.parse import urlparse
2991
-
from pydantic import HttpUrl, ValidationError
2993
-
from ..models import AtomEntry, FeedMetadata
2997
-
"""Parser for RSS/Atom feeds with normalization and auto-discovery."""
2999
-
def __init__(self, user_agent: str = "thicket/0.1.0"):
3000
-
"""Initialize the feed parser."""
3001
-
self.user_agent = user_agent
3002
-
self.allowed_tags = [
3003
-
"a", "abbr", "acronym", "b", "blockquote", "br", "code", "em",
3004
-
"i", "li", "ol", "p", "pre", "strong", "ul", "h1", "h2", "h3",
3005
-
"h4", "h5", "h6", "img", "div", "span",
3007
-
self.allowed_attributes = {
3008
-
"a": ["href", "title"],
3009
-
"abbr": ["title"],
3010
-
"acronym": ["title"],
3011
-
"img": ["src", "alt", "title", "width", "height"],
3012
-
"blockquote": ["cite"],
3015
-
async def fetch_feed(self, url: HttpUrl) -> str:
3016
-
"""Fetch feed content from URL."""
3017
-
async with httpx.AsyncClient() as client:
3018
-
response = await client.get(
3020
-
headers={"User-Agent": self.user_agent},
3022
-
follow_redirects=True,
3024
-
response.raise_for_status()
3025
-
return response.text
3027
-
def parse_feed(self, content: str, source_url: Optional[HttpUrl] = None) -> tuple[FeedMetadata, list[AtomEntry]]:
3028
-
"""Parse feed content and return metadata and entries."""
3029
-
parsed = feedparser.parse(content)
3031
-
if parsed.bozo and parsed.bozo_exception:
3032
-
# Try to continue with potentially malformed feed
3035
-
# Extract feed metadata
3036
-
feed_meta = self._extract_feed_metadata(parsed.feed)
3038
-
# Extract and normalize entries
3040
-
for entry in parsed.entries:
3042
-
atom_entry = self._normalize_entry(entry, source_url)
3043
-
entries.append(atom_entry)
3044
-
except Exception as e:
3045
-
# Log error but continue processing other entries
3046
-
print(f"Error processing entry {getattr(entry, 'id', 'unknown')}: {e}")
3049
-
return feed_meta, entries
3051
-
def _extract_feed_metadata(self, feed: feedparser.FeedParserDict) -> FeedMetadata:
3052
-
"""Extract metadata from feed for auto-discovery."""
3053
-
# Parse author information
3054
-
author_name = None
3055
-
author_email = None
3058
-
if hasattr(feed, 'author_detail'):
3059
-
author_name = feed.author_detail.get('name')
3060
-
author_email = feed.author_detail.get('email')
3061
-
author_uri = feed.author_detail.get('href')
3062
-
elif hasattr(feed, 'author'):
3063
-
author_name = feed.author
3065
-
# Parse managing editor for RSS feeds
3066
-
if not author_email and hasattr(feed, 'managingEditor'):
3067
-
author_email = feed.managingEditor
3071
-
if hasattr(feed, 'link'):
3073
-
feed_link = HttpUrl(feed.link)
3074
-
except ValidationError:
3077
-
# Parse image/icon/logo
3082
-
if hasattr(feed, 'image'):
3084
-
image_url = HttpUrl(feed.image.get('href', feed.image.get('url', '')))
3085
-
except (ValidationError, AttributeError):
3088
-
if hasattr(feed, 'icon'):
3090
-
icon = HttpUrl(feed.icon)
3091
-
except ValidationError:
3094
-
if hasattr(feed, 'logo'):
3096
-
logo = HttpUrl(feed.logo)
3097
-
except ValidationError:
3100
-
return FeedMetadata(
3101
-
title=getattr(feed, 'title', None),
3102
-
author_name=author_name,
3103
-
author_email=author_email,
3104
-
author_uri=HttpUrl(author_uri) if author_uri else None,
3108
-
image_url=image_url,
3109
-
description=getattr(feed, 'description', None),
3112
-
def _normalize_entry(self, entry: feedparser.FeedParserDict, source_url: Optional[HttpUrl] = None) -> AtomEntry:
3113
-
"""Normalize an entry to Atom format."""
3114
-
# Parse timestamps
3115
-
updated = self._parse_timestamp(entry.get('updated_parsed') or entry.get('published_parsed'))
3116
-
published = self._parse_timestamp(entry.get('published_parsed'))
3119
-
content = self._extract_content(entry)
3120
-
content_type = self._extract_content_type(entry)
3123
-
author = self._extract_author(entry)
3125
-
# Parse categories/tags
3127
-
if hasattr(entry, 'tags'):
3128
-
categories = [tag.get('term', '') for tag in entry.tags if tag.get('term')]
3130
-
# Sanitize HTML content
3132
-
content = self._sanitize_html(content)
3134
-
summary = entry.get('summary', '')
3136
-
summary = self._sanitize_html(summary)
3139
-
id=entry.get('id', entry.get('link', '')),
3140
-
title=entry.get('title', ''),
3141
-
link=HttpUrl(entry.get('link', '')),
3143
-
published=published,
3144
-
summary=summary or None,
3145
-
content=content or None,
3146
-
content_type=content_type,
3148
-
categories=categories,
3149
-
rights=entry.get('rights', None),
3150
-
source=str(source_url) if source_url else None,
3153
-
def _parse_timestamp(self, time_struct) -> datetime:
3154
-
"""Parse feedparser time struct to datetime."""
3156
-
return datetime(*time_struct[:6])
3157
-
return datetime.now()
3159
-
def _extract_content(self, entry: feedparser.FeedParserDict) -> Optional[str]:
3160
-
"""Extract the best content from an entry."""
3161
-
# Prefer content over summary
3162
-
if hasattr(entry, 'content') and entry.content:
3163
-
# Find the best content (prefer text/html, then text/plain)
3164
-
for content_item in entry.content:
3165
-
if content_item.get('type') in ['text/html', 'html']:
3166
-
return content_item.get('value', '')
3167
-
elif content_item.get('type') in ['text/plain', 'text']:
3168
-
return content_item.get('value', '')
3169
-
# Fallback to first content item
3170
-
return entry.content[0].get('value', '')
3172
-
# Fallback to summary
3173
-
return entry.get('summary', '')
3175
-
def _extract_content_type(self, entry: feedparser.FeedParserDict) -> str:
3176
-
"""Extract content type from entry."""
3177
-
if hasattr(entry, 'content') and entry.content:
3178
-
content_type = entry.content[0].get('type', 'html')
3179
-
# Normalize content type
3180
-
if content_type in ['text/html', 'html']:
3182
-
elif content_type in ['text/plain', 'text']:
3184
-
elif content_type == 'xhtml':
3188
-
def _extract_author(self, entry: feedparser.FeedParserDict) -> Optional[dict]:
3189
-
"""Extract author information from entry."""
3192
-
if hasattr(entry, 'author_detail'):
3194
-
'name': entry.author_detail.get('name'),
3195
-
'email': entry.author_detail.get('email'),
3196
-
'uri': entry.author_detail.get('href'),
3198
-
elif hasattr(entry, 'author'):
3199
-
author['name'] = entry.author
3201
-
return author if author else None
3203
-
def _sanitize_html(self, html: str) -> str:
3204
-
"""Sanitize HTML content to prevent XSS."""
3205
-
return bleach.clean(
3207
-
tags=self.allowed_tags,
3208
-
attributes=self.allowed_attributes,
3212
-
def sanitize_entry_id(self, entry_id: str) -> str:
3213
-
"""Sanitize entry ID to be a safe filename."""
3214
-
# Parse URL to get meaningful parts
3215
-
parsed = urlparse(entry_id)
3217
-
# Start with the path component
3219
-
# Remove leading slash and replace problematic characters
3220
-
safe_id = parsed.path.lstrip('/').replace('/', '_').replace('\\', '_')
3222
-
# Use the entire ID as fallback
3223
-
safe_id = entry_id
3225
-
# Replace problematic characters
3227
-
for char in safe_id:
3228
-
if char.isalnum() or char in '-_.':
3229
-
safe_chars.append(char)
3231
-
safe_chars.append('_')
3233
-
safe_id = ''.join(safe_chars)
3235
-
# Ensure it's not too long (max 200 chars)
3236
-
if len(safe_id) > 200:
3237
-
safe_id = safe_id[:200]
3239
-
# Ensure it's not empty
3246
-
<file path="src/thicket/core/reference_parser.py">
3247
-
"""Reference detection and parsing for blog entries."""
3250
-
from typing import Optional
3251
-
from urllib.parse import urlparse
3253
-
from ..models import AtomEntry
3256
-
class BlogReference:
3257
-
"""Represents a reference from one blog entry to another."""
3261
-
source_entry_id: str,
3262
-
source_username: str,
3264
-
target_username: Optional[str] = None,
3265
-
target_entry_id: Optional[str] = None,
3267
-
self.source_entry_id = source_entry_id
3268
-
self.source_username = source_username
3269
-
self.target_url = target_url
3270
-
self.target_username = target_username
3271
-
self.target_entry_id = target_entry_id
3273
-
def to_dict(self) -> dict:
3274
-
"""Convert to dictionary for JSON serialization."""
3276
-
"source_entry_id": self.source_entry_id,
3277
-
"source_username": self.source_username,
3278
-
"target_url": self.target_url,
3281
-
# Only include optional fields if they are not None
3282
-
if self.target_username is not None:
3283
-
result["target_username"] = self.target_username
3284
-
if self.target_entry_id is not None:
3285
-
result["target_entry_id"] = self.target_entry_id
3290
-
def from_dict(cls, data: dict) -> "BlogReference":
3291
-
"""Create from dictionary."""
3293
-
source_entry_id=data["source_entry_id"],
3294
-
source_username=data["source_username"],
3295
-
target_url=data["target_url"],
3296
-
target_username=data.get("target_username"),
3297
-
target_entry_id=data.get("target_entry_id"),
3301
-
class ReferenceIndex:
3302
-
"""Index of blog-to-blog references for creating threaded views."""
3304
-
def __init__(self):
3305
-
self.references: list[BlogReference] = []
3306
-
self.outbound_refs: dict[
3307
-
str, list[BlogReference]
3308
-
] = {} # entry_id -> outbound refs
3309
-
self.inbound_refs: dict[
3310
-
str, list[BlogReference]
3311
-
] = {} # entry_id -> inbound refs
3312
-
self.user_domains: dict[str, set[str]] = {} # username -> set of domains
3314
-
def add_reference(self, ref: BlogReference) -> None:
3315
-
"""Add a reference to the index."""
3316
-
self.references.append(ref)
3318
-
# Update outbound references
3319
-
source_key = f"{ref.source_username}:{ref.source_entry_id}"
3320
-
if source_key not in self.outbound_refs:
3321
-
self.outbound_refs[source_key] = []
3322
-
self.outbound_refs[source_key].append(ref)
3324
-
# Update inbound references if we can identify the target
3325
-
if ref.target_username and ref.target_entry_id:
3326
-
target_key = f"{ref.target_username}:{ref.target_entry_id}"
3327
-
if target_key not in self.inbound_refs:
3328
-
self.inbound_refs[target_key] = []
3329
-
self.inbound_refs[target_key].append(ref)
3331
-
def get_outbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:
3332
-
"""Get all outbound references from an entry."""
3333
-
key = f"{username}:{entry_id}"
3334
-
return self.outbound_refs.get(key, [])
3336
-
def get_inbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:
3337
-
"""Get all inbound references to an entry."""
3338
-
key = f"{username}:{entry_id}"
3339
-
return self.inbound_refs.get(key, [])
3341
-
def get_thread_members(self, username: str, entry_id: str) -> set[tuple[str, str]]:
3342
-
"""Get all entries that are part of the same thread."""
3344
-
to_visit = [(username, entry_id)]
3345
-
thread_members = set()
3348
-
current_user, current_entry = to_visit.pop()
3349
-
if (current_user, current_entry) in visited:
3352
-
visited.add((current_user, current_entry))
3353
-
thread_members.add((current_user, current_entry))
3355
-
# Add outbound references
3356
-
for ref in self.get_outbound_refs(current_user, current_entry):
3357
-
if ref.target_username and ref.target_entry_id:
3358
-
to_visit.append((ref.target_username, ref.target_entry_id))
3360
-
# Add inbound references
3361
-
for ref in self.get_inbound_refs(current_user, current_entry):
3362
-
to_visit.append((ref.source_username, ref.source_entry_id))
3364
-
return thread_members
3366
-
def to_dict(self) -> dict:
3367
-
"""Convert to dictionary for JSON serialization."""
3369
-
"references": [ref.to_dict() for ref in self.references],
3370
-
"user_domains": {k: list(v) for k, v in self.user_domains.items()},
3374
-
def from_dict(cls, data: dict) -> "ReferenceIndex":
3375
-
"""Create from dictionary."""
3377
-
for ref_data in data.get("references", []):
3378
-
ref = BlogReference.from_dict(ref_data)
3379
-
index.add_reference(ref)
3381
-
for username, domains in data.get("user_domains", {}).items():
3382
-
index.user_domains[username] = set(domains)
3387
-
class ReferenceParser:
3388
-
"""Parses blog entries to detect references to other blogs."""
3390
-
def __init__(self):
3391
-
# Common blog platforms and patterns
3392
-
self.blog_patterns = [
3393
-
r"https?://[^/]+\.(?:org|com|net|io|dev|me|co\.uk)/.*", # Common blog domains
3394
-
r"https?://[^/]+\.github\.io/.*", # GitHub Pages
3395
-
r"https?://[^/]+\.substack\.com/.*", # Substack
3396
-
r"https?://medium\.com/.*", # Medium
3397
-
r"https?://[^/]+\.wordpress\.com/.*", # WordPress.com
3398
-
r"https?://[^/]+\.blogspot\.com/.*", # Blogger
3401
-
# Compile regex patterns
3402
-
self.link_pattern = re.compile(
3403
-
r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL
3405
-
self.url_pattern = re.compile(r'https?://[^\s<>"]+')
3407
-
def extract_links_from_html(self, html_content: str) -> list[tuple[str, str]]:
3408
-
"""Extract all links from HTML content."""
3411
-
# Extract links from <a> tags
3412
-
for match in self.link_pattern.finditer(html_content):
3413
-
url = match.group(1)
3415
-
r"<[^>]+>", "", match.group(2)
3416
-
).strip() # Remove HTML tags from link text
3417
-
links.append((url, text))
3421
-
def is_blog_url(self, url: str) -> bool:
3422
-
"""Check if a URL likely points to a blog post."""
3423
-
for pattern in self.blog_patterns:
3424
-
if re.match(pattern, url):
3428
-
def _is_likely_blog_post_url(self, url: str) -> bool:
3429
-
"""Check if a same-domain URL likely points to a blog post (not CSS, images, etc.)."""
3430
-
parsed_url = urlparse(url)
3431
-
path = parsed_url.path.lower()
3433
-
# Skip obvious non-blog content
3434
-
if any(path.endswith(ext) for ext in ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.pdf', '.xml', '.json']):
3437
-
# Skip common non-blog paths
3438
-
if any(segment in path for segment in ['/static/', '/assets/', '/css/', '/js/', '/images/', '/img/', '/media/', '/uploads/']):
3441
-
# Skip fragment-only links (same page anchors)
3442
-
if not path or path == '/':
3445
-
# Look for positive indicators of blog posts
3446
-
# Common blog post patterns: dates, slugs, post indicators
3447
-
blog_indicators = [
3448
-
r'/\d{4}/', # Year in path
3449
-
r'/\d{4}/\d{2}/', # Year/month in path
3459
-
for pattern in blog_indicators:
3460
-
if re.search(pattern, path):
3463
-
# If it has a reasonable path depth and doesn't match exclusions, likely a blog post
3464
-
path_segments = [seg for seg in path.split('/') if seg]
3465
-
return len(path_segments) >= 1 # At least one meaningful path segment
3467
-
def resolve_target_user(
3468
-
self, url: str, user_domains: dict[str, set[str]]
3469
-
) -> Optional[str]:
3470
-
"""Try to resolve a URL to a known user based on domain mapping."""
3471
-
parsed_url = urlparse(url)
3472
-
domain = parsed_url.netloc.lower()
3474
-
for username, domains in user_domains.items():
3475
-
if domain in domains:
3480
-
def extract_references(
3481
-
self, entry: AtomEntry, username: str, user_domains: dict[str, set[str]]
3482
-
) -> list[BlogReference]:
3483
-
"""Extract all blog references from an entry."""
3486
-
# Combine all text content for analysis
3487
-
content_to_search = []
3489
-
content_to_search.append(entry.content)
3491
-
content_to_search.append(entry.summary)
3493
-
for content in content_to_search:
3494
-
links = self.extract_links_from_html(content)
3496
-
for url, _link_text in links:
3498
-
urlparse(str(entry.link)).netloc.lower() if entry.link else ""
3500
-
link_domain = urlparse(url).netloc.lower()
3502
-
# Check if this looks like a blog URL
3503
-
if not self.is_blog_url(url):
3506
-
# For same-domain links, apply additional filtering to avoid non-blog content
3507
-
if link_domain == entry_domain:
3508
-
# Only include same-domain links that look like blog posts
3509
-
if not self._is_likely_blog_post_url(url):
3512
-
# Try to resolve to a known user
3513
-
if link_domain == entry_domain:
3514
-
# Same domain - target user is the same as source user
3515
-
target_username: Optional[str] = username
3517
-
# Different domain - try to resolve
3518
-
target_username = self.resolve_target_user(url, user_domains)
3520
-
ref = BlogReference(
3521
-
source_entry_id=entry.id,
3522
-
source_username=username,
3524
-
target_username=target_username,
3525
-
target_entry_id=None, # Will be resolved later if possible
3528
-
references.append(ref)
3532
-
def build_user_domain_mapping(self, git_store: "GitStore") -> dict[str, set[str]]:
3533
-
"""Build mapping of usernames to their known domains."""
3535
-
index = git_store._load_index()
3537
-
for username, user_metadata in index.users.items():
3540
-
# Add domains from feeds
3541
-
for feed_url in user_metadata.feeds:
3542
-
domain = urlparse(feed_url).netloc.lower()
3544
-
domains.add(domain)
3546
-
# Add domain from homepage
3547
-
if user_metadata.homepage:
3548
-
domain = urlparse(str(user_metadata.homepage)).netloc.lower()
3550
-
domains.add(domain)
3552
-
user_domains[username] = domains
3554
-
return user_domains
3556
-
def _build_url_to_entry_mapping(self, git_store: "GitStore") -> dict[str, str]:
3557
-
"""Build a comprehensive mapping from URLs to entry IDs using git store data.
3559
-
This creates a bidirectional mapping that handles:
3560
-
- Entry link URLs -> Entry IDs
3561
-
- URL variations (with/without www, http/https)
3562
-
- Multiple URLs pointing to the same entry
3564
-
url_to_entry: dict[str, str] = {}
3566
-
# Load index to get all users
3567
-
index = git_store._load_index()
3569
-
for username in index.users.keys():
3570
-
entries = git_store.list_entries(username)
3572
-
for entry in entries:
3574
-
link_url = str(entry.link)
3575
-
entry_id = entry.id
3577
-
# Map the canonical link URL
3578
-
url_to_entry[link_url] = entry_id
3580
-
# Handle common URL variations
3581
-
parsed = urlparse(link_url)
3582
-
if parsed.netloc and parsed.path:
3583
-
# Add version without www
3584
-
if parsed.netloc.startswith('www.'):
3585
-
no_www_url = f"{parsed.scheme}://{parsed.netloc[4:]}{parsed.path}"
3587
-
no_www_url += f"?{parsed.query}"
3588
-
if parsed.fragment:
3589
-
no_www_url += f"#{parsed.fragment}"
3590
-
url_to_entry[no_www_url] = entry_id
3592
-
# Add version with www if not present
3593
-
elif not parsed.netloc.startswith('www.'):
3594
-
www_url = f"{parsed.scheme}://www.{parsed.netloc}{parsed.path}"
3596
-
www_url += f"?{parsed.query}"
3597
-
if parsed.fragment:
3598
-
www_url += f"#{parsed.fragment}"
3599
-
url_to_entry[www_url] = entry_id
3601
-
# Add http/https variations
3602
-
if parsed.scheme == 'https':
3603
-
http_url = link_url.replace('https://', 'http://', 1)
3604
-
url_to_entry[http_url] = entry_id
3605
-
elif parsed.scheme == 'http':
3606
-
https_url = link_url.replace('http://', 'https://', 1)
3607
-
url_to_entry[https_url] = entry_id
3609
-
return url_to_entry
3611
-
def _normalize_url(self, url: str) -> str:
3612
-
"""Normalize URL for consistent matching.
3614
-
Handles common variations like trailing slashes, fragments, etc.
3616
-
parsed = urlparse(url)
3618
-
# Remove trailing slash from path
3619
-
path = parsed.path.rstrip('/') if parsed.path != '/' else parsed.path
3621
-
# Reconstruct without fragment for consistent matching
3622
-
normalized = f"{parsed.scheme}://{parsed.netloc}{path}"
3624
-
normalized += f"?{parsed.query}"
3628
-
def resolve_target_entry_ids(
3629
-
self, references: list[BlogReference], git_store: "GitStore"
3630
-
) -> list[BlogReference]:
3631
-
"""Resolve target_entry_id for references using comprehensive URL mapping."""
3632
-
resolved_refs = []
3634
-
# Build comprehensive URL to entry ID mapping
3635
-
url_to_entry = self._build_url_to_entry_mapping(git_store)
3637
-
for ref in references:
3638
-
# If we already have a target_entry_id, keep the reference as-is
3639
-
if ref.target_entry_id is not None:
3640
-
resolved_refs.append(ref)
3643
-
# If we don't have a target_username, we can't resolve it
3644
-
if ref.target_username is None:
3645
-
resolved_refs.append(ref)
3648
-
# Try to resolve using URL mapping
3649
-
resolved_entry_id = None
3651
-
# First, try exact match
3652
-
if ref.target_url in url_to_entry:
3653
-
resolved_entry_id = url_to_entry[ref.target_url]
3655
-
# Try normalized URL matching
3656
-
normalized_target = self._normalize_url(ref.target_url)
3657
-
if normalized_target in url_to_entry:
3658
-
resolved_entry_id = url_to_entry[normalized_target]
3660
-
# Try URL variations
3661
-
for mapped_url, entry_id in url_to_entry.items():
3662
-
if self._normalize_url(mapped_url) == normalized_target:
3663
-
resolved_entry_id = entry_id
3666
-
# Verify the resolved entry belongs to the target username
3667
-
if resolved_entry_id:
3668
-
# Double-check by loading the actual entry
3669
-
entries = git_store.list_entries(ref.target_username)
3670
-
entry_found = any(entry.id == resolved_entry_id for entry in entries)
3671
-
if not entry_found:
3672
-
resolved_entry_id = None
3674
-
# Create a new reference with the resolved target_entry_id
3675
-
resolved_ref = BlogReference(
3676
-
source_entry_id=ref.source_entry_id,
3677
-
source_username=ref.source_username,
3678
-
target_url=ref.target_url,
3679
-
target_username=ref.target_username,
3680
-
target_entry_id=resolved_entry_id,
3682
-
resolved_refs.append(resolved_ref)
3684
-
return resolved_refs
3687
-
<file path="src/thicket/models/__init__.py">
3688
-
"""Data models for thicket."""
3690
-
from .config import ThicketConfig, UserConfig
3691
-
from .feed import AtomEntry, DuplicateMap, FeedMetadata
3692
-
from .user import GitStoreIndex, UserMetadata
3705
-
<file path="src/thicket/models/feed.py">
3706
-
"""Feed and entry models for thicket."""
3708
-
from datetime import datetime
3709
-
from typing import TYPE_CHECKING, Optional
3711
-
from pydantic import BaseModel, ConfigDict, EmailStr, HttpUrl
3714
-
from .config import UserConfig
3717
-
class AtomEntry(BaseModel):
3718
-
"""Represents an Atom feed entry stored in the Git repository."""
3720
-
model_config = ConfigDict(
3721
-
json_encoders={datetime: lambda v: v.isoformat()},
3722
-
str_strip_whitespace=True,
3725
-
id: str # Original Atom ID
3729
-
published: Optional[datetime] = None
3730
-
summary: Optional[str] = None
3731
-
content: Optional[str] = None # Full body content from Atom entry
3732
-
content_type: Optional[str] = "html" # text, html, xhtml
3733
-
author: Optional[dict] = None
3734
-
categories: list[str] = []
3735
-
rights: Optional[str] = None # Copyright info
3736
-
source: Optional[str] = None # Source feed URL
3739
-
class FeedMetadata(BaseModel):
3740
-
"""Metadata extracted from a feed for auto-discovery."""
3742
-
title: Optional[str] = None
3743
-
author_name: Optional[str] = None
3744
-
author_email: Optional[EmailStr] = None
3745
-
author_uri: Optional[HttpUrl] = None
3746
-
link: Optional[HttpUrl] = None
3747
-
logo: Optional[HttpUrl] = None
3748
-
icon: Optional[HttpUrl] = None
3749
-
image_url: Optional[HttpUrl] = None
3750
-
description: Optional[str] = None
3752
-
def to_user_config(self, username: str, feed_url: HttpUrl) -> "UserConfig":
3753
-
"""Convert discovered metadata to UserConfig with fallbacks."""
3754
-
from .config import UserConfig
3756
-
return UserConfig(
3757
-
username=username,
3759
-
display_name=self.author_name or self.title,
3760
-
email=self.author_email,
3761
-
homepage=self.author_uri or self.link,
3762
-
icon=self.logo or self.icon or self.image_url,
3766
-
class DuplicateMap(BaseModel):
3767
-
"""Maps duplicate entry IDs to canonical entry IDs."""
3769
-
duplicates: dict[str, str] = {} # duplicate_id -> canonical_id
3770
-
comment: str = "Entry IDs that map to the same canonical content"
3772
-
def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
3773
-
"""Add a duplicate mapping."""
3774
-
self.duplicates[duplicate_id] = canonical_id
3776
-
def remove_duplicate(self, duplicate_id: str) -> bool:
3777
-
"""Remove a duplicate mapping. Returns True if existed."""
3778
-
return self.duplicates.pop(duplicate_id, None) is not None
3780
-
def get_canonical(self, entry_id: str) -> str:
3781
-
"""Get canonical ID for an entry (returns original if not duplicate)."""
3782
-
return self.duplicates.get(entry_id, entry_id)
3784
-
def is_duplicate(self, entry_id: str) -> bool:
3785
-
"""Check if entry ID is marked as duplicate."""
3786
-
return entry_id in self.duplicates
3788
-
def get_duplicates_for_canonical(self, canonical_id: str) -> list[str]:
3789
-
"""Get all duplicate IDs that map to a canonical ID."""
3792
-
for duplicate_id, canonical in self.duplicates.items()
3793
-
if canonical == canonical_id
3797
-
<file path="src/thicket/models/user.py">
3798
-
"""User metadata models for thicket."""
3800
-
from datetime import datetime
3801
-
from typing import Optional
3803
-
from pydantic import BaseModel, ConfigDict
3806
-
class UserMetadata(BaseModel):
3807
-
"""Metadata about a user stored in the Git repository."""
3809
-
model_config = ConfigDict(
3810
-
json_encoders={datetime: lambda v: v.isoformat()},
3811
-
str_strip_whitespace=True,
3815
-
display_name: Optional[str] = None
3816
-
email: Optional[str] = None
3817
-
homepage: Optional[str] = None
3818
-
icon: Optional[str] = None
3819
-
feeds: list[str] = []
3820
-
directory: str # Directory name in Git store
3822
-
last_updated: datetime
3823
-
entry_count: int = 0
3825
-
def update_timestamp(self) -> None:
3826
-
"""Update the last_updated timestamp to now."""
3827
-
self.last_updated = datetime.now()
3829
-
def increment_entry_count(self, count: int = 1) -> None:
3830
-
"""Increment the entry count by the given amount."""
3831
-
self.entry_count += count
3832
-
self.update_timestamp()
3835
-
class GitStoreIndex(BaseModel):
3836
-
"""Index of all users and their directories in the Git store."""
3838
-
model_config = ConfigDict(
3839
-
json_encoders={datetime: lambda v: v.isoformat()}
3842
-
users: dict[str, UserMetadata] = {} # username -> UserMetadata
3844
-
last_updated: datetime
3845
-
total_entries: int = 0
3847
-
def add_user(self, user_metadata: UserMetadata) -> None:
3848
-
"""Add or update a user in the index."""
3849
-
self.users[user_metadata.username] = user_metadata
3850
-
self.last_updated = datetime.now()
3852
-
def remove_user(self, username: str) -> bool:
3853
-
"""Remove a user from the index. Returns True if user existed."""
3854
-
if username in self.users:
3855
-
del self.users[username]
3856
-
self.last_updated = datetime.now()
3860
-
def get_user(self, username: str) -> Optional[UserMetadata]:
3861
-
"""Get user metadata by username."""
3862
-
return self.users.get(username)
3864
-
def update_entry_count(self, username: str, count: int) -> None:
3865
-
"""Update entry count for a user and total."""
3866
-
user = self.get_user(username)
3868
-
user.increment_entry_count(count)
3869
-
self.total_entries += count
3870
-
self.last_updated = datetime.now()
3872
-
def recalculate_totals(self) -> None:
3873
-
"""Recalculate total entries from all users."""
3874
-
self.total_entries = sum(user.entry_count for user in self.users.values())
3875
-
self.last_updated = datetime.now()
3878
-
<file path="src/thicket/utils/__init__.py">
3879
-
"""Utility modules for thicket."""
3881
-
# This module will contain shared utilities
3882
-
# For now, it's empty but can be expanded with common functions
3885
-
<file path="src/thicket/__init__.py">
3886
-
"""Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories."""
3888
-
__version__ = "0.1.0"
3889
-
__author__ = "thicket"
3890
-
__email__ = "thicket@example.com"
3893
-
<file path="src/thicket/__main__.py">
3894
-
"""Entry point for running thicket as a module."""
3896
-
from .cli.main import app
3898
-
if __name__ == "__main__":
3902
-
<file path=".gitignore">
3903
-
# Byte-compiled / optimized / DLL files
3911
-
# Distribution / packaging
3925
-
share/python-wheels/
3932
-
# Usually these files are written by a python script from a template
3933
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
3939
-
pip-delete-this-directory.txt
3941
-
# Unit test / coverage reports
3964
-
db.sqlite3-journal
3973
-
# Sphinx documentation
3980
-
# Jupyter Notebook
3981
-
.ipynb_checkpoints
3988
-
# For a library or package, you might want to ignore these files since the code is
3989
-
# intended to run in multiple environments; otherwise, check them in:
3993
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
3994
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
3995
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
3996
-
# install all needed dependencies.
4000
-
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
4001
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
4002
-
# commonly ignored for libraries.
4006
-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
4007
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
4008
-
# commonly ignored for libraries.
4009
-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
4014
-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
4015
-
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
4016
-
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
4023
-
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
4025
-
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
4026
-
# in the .venv directory. It is recommended not to include this directory in version control.
4029
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
4033
-
celerybeat-schedule
4036
-
# SageMath parsed files
4049
-
# Spyder project settings
4053
-
# Rope project settings
4056
-
# mkdocs documentation
4064
-
# Pyre type checker
4067
-
# pytype static type analyzer
4070
-
# Cython debug symbols
4074
-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
4075
-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
4076
-
# and can be added to the global gitignore or merged into this file. For a more nuclear
4077
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
4081
-
# Abstra is an AI-powered process automation framework.
4082
-
# Ignore directories containing user credentials, local state, and settings.
4083
-
# Learn more at https://abstra.io/docs
4086
-
# Visual Studio Code
4087
-
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
4088
-
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
4089
-
# and can be added to the global gitignore or merged into this file. However, if you prefer,
4090
-
# you could uncomment the following to ignore the entire vscode folder
4096
-
# PyPI configuration file
4105
-
.streamlit/secrets.toml
4110
-
<file path="CLAUDE.md">
4111
-
My goal is to build a CLI tool called thicket in Python that maintains a Git repository within which Atom feeds can be persisted, including their contents.
4113
-
# Python Environment and Package Management
4115
-
This project uses `uv` for Python package management and virtual environment handling.
4117
-
## Running Commands
4119
-
ALWAYS use `uv run` to execute Python commands:
4121
-
- Run the CLI: `uv run -m thicket`
4122
-
- Run tests: `uv run pytest`
4123
-
- Type checking: `uv run mypy src/`
4124
-
- Linting: `uv run ruff check src/`
4125
-
- Format code: `uv run ruff format src/`
4126
-
- Compile check: `uv run python -m py_compile <file>`
4128
-
## Package Management
4130
-
- Add dependencies: `uv add <package>`
4131
-
- Add dev dependencies: `uv add --dev <package>`
4132
-
- Install dependencies: `uv sync`
4133
-
- Update dependencies: `uv lock --upgrade`
4135
-
# Project Structure
4137
-
The configuration file specifies:
4138
-
- the location of a git store
4139
-
- a list of usernames and target Atom/RSS feed(s) and optional metadata about the username such as their email, homepage, icon and display name
4140
-
- a cache directory to store temporary results such as feed downloads and their last modification date that speed up operations across runs of the tool
4142
-
The Git data store should:
4143
-
- have a subdirectory per user
4144
-
- within that directory, an entry per Atom entry indexed by the Atom id for that entry. The id should be sanitised consistently to be a safe filename. RSS feed should be normalized to Atom before storing it.
4145
-
- within each entry file, the metadata of the Atom feed converted into a JSON format that preserves as much metadata as possible.
4146
-
- have a JSON file in the Git repository that indexes the users, their associated directories within the Git repository, and any other metadata about that user from the config file
4147
-
The CLI should be modern and use cool progress bars and any otfrom ecosystem libraries.
4149
-
The intention behind the Git repository is that it can be queried by other websites in order to build a webblog structure of comments that link to other blogs.
4152
-
<file path="pyproject.toml">
4154
-
requires = ["hatchling"]
4155
-
build-backend = "hatchling.build"
4159
-
dynamic = ["version"]
4160
-
description = "A CLI tool for persisting Atom/RSS feeds in Git repositories"
4161
-
readme = "README.md"
4163
-
requires-python = ">=3.9"
4165
-
{name = "thicket", email = "thicket@example.com"},
4168
-
"Development Status :: 3 - Alpha",
4169
-
"Intended Audience :: Developers",
4170
-
"License :: OSI Approved :: MIT License",
4171
-
"Operating System :: OS Independent",
4172
-
"Programming Language :: Python :: 3",
4173
-
"Programming Language :: Python :: 3.9",
4174
-
"Programming Language :: Python :: 3.10",
4175
-
"Programming Language :: Python :: 3.11",
4176
-
"Programming Language :: Python :: 3.12",
4177
-
"Programming Language :: Python :: 3.13",
4178
-
"Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary",
4179
-
"Topic :: Software Development :: Version Control :: Git",
4180
-
"Topic :: Text Processing :: Markup :: XML",
4185
-
"GitPython>=3.1.40",
4186
-
"feedparser>=6.0.11",
4187
-
"pydantic>=2.11.0",
4188
-
"pydantic-settings>=2.10.0",
4190
-
"pendulum>=3.0.0",
4192
-
"platformdirs>=4.0.0",
4194
-
"email_validator",
4198
-
[project.optional-dependencies]
4201
-
"pytest-asyncio>=0.24.0",
4202
-
"pytest-cov>=6.0.0",
4206
-
"types-PyYAML>=6.0.0",
4210
-
Homepage = "https://github.com/example/thicket"
4211
-
Documentation = "https://github.com/example/thicket"
4212
-
Repository = "https://github.com/example/thicket"
4213
-
"Bug Tracker" = "https://github.com/example/thicket/issues"
4216
-
thicket = "thicket.cli.main:app"
4218
-
[tool.hatch.version]
4219
-
path = "src/thicket/__init__.py"
4221
-
[tool.hatch.build.targets.wheel]
4222
-
packages = ["src/thicket"]
4226
-
target-version = ['py39']
4227
-
include = '\.pyi?$'
4228
-
extend-exclude = '''
4243
-
target-version = "py39"
4248
-
"E", # pycodestyle errors
4249
-
"W", # pycodestyle warnings
4252
-
"B", # flake8-bugbear
4253
-
"C4", # flake8-comprehensions
4257
-
"E501", # line too long, handled by black
4258
-
"B008", # do not perform function calls in argument defaults
4259
-
"C901", # too complex
4262
-
[tool.ruff.lint.per-file-ignores]
4263
-
"__init__.py" = ["F401"]
4266
-
python_version = "3.9"
4267
-
check_untyped_defs = true
4268
-
disallow_any_generics = true
4269
-
disallow_incomplete_defs = true
4270
-
disallow_untyped_defs = true
4271
-
no_implicit_optional = true
4272
-
warn_redundant_casts = true
4273
-
warn_unused_ignores = true
4274
-
warn_return_any = true
4275
-
strict_optional = true
4277
-
[[tool.mypy.overrides]]
4283
-
ignore_missing_imports = true
4285
-
[tool.pytest.ini_options]
4286
-
testpaths = ["tests"]
4287
-
python_files = ["test_*.py"]
4288
-
python_classes = ["Test*"]
4289
-
python_functions = ["test_*"]
4292
-
"--strict-markers",
4293
-
"--strict-config",
4294
-
"--cov=src/thicket",
4295
-
"--cov-report=term-missing",
4296
-
"--cov-report=html",
4297
-
"--cov-report=xml",
4299
-
filterwarnings = [
4301
-
"ignore::UserWarning",
4302
-
"ignore::DeprecationWarning",
4305
-
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
4306
-
"integration: marks tests as integration tests",
4309
-
[tool.coverage.run]
4313
-
[tool.coverage.report]
4315
-
"pragma: no cover",
4318
-
"if settings.DEBUG",
4319
-
"raise AssertionError",
4320
-
"raise NotImplementedError",
4322
-
"if __name__ == .__main__.:",
4323
-
"class .*\\bProtocol\\):",
4324
-
"@(abc\\.)?abstractmethod",
4328
-
<file path="src/thicket/cli/commands/__init__.py">
4329
-
"""CLI commands for thicket."""
4331
-
# Import all commands to register them with the main app
4332
-
from . import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync
4334
-
__all__ = ["add", "duplicates", "generate", "index_cmd", "info_cmd", "init", "links_cmd", "list_cmd", "sync"]
4337
-
<file path="src/thicket/cli/commands/add.py">
4338
-
"""Add command for thicket."""
4341
-
from pathlib import Path
4342
-
from typing import Optional
4345
-
from pydantic import HttpUrl, ValidationError
4347
-
from ...core.feed_parser import FeedParser
4348
-
from ...core.git_store import GitStore
4349
-
from ..main import app
4350
-
from ..utils import (
4359
-
@app.command("add")
4361
-
subcommand: str = typer.Argument(..., help="Subcommand: 'user' or 'feed'"),
4362
-
username: str = typer.Argument(..., help="Username"),
4363
-
feed_url: Optional[str] = typer.Argument(None, help="Feed URL (required for 'user' command)"),
4364
-
email: Optional[str] = typer.Option(None, "--email", "-e", help="User email"),
4365
-
homepage: Optional[str] = typer.Option(None, "--homepage", "-h", help="User homepage"),
4366
-
icon: Optional[str] = typer.Option(None, "--icon", "-i", help="User icon URL"),
4367
-
display_name: Optional[str] = typer.Option(None, "--display-name", "-d", help="User display name"),
4368
-
config_file: Optional[Path] = typer.Option(
4369
-
Path("thicket.yaml"), "--config", help="Configuration file path"
4371
-
auto_discover: bool = typer.Option(
4372
-
True, "--auto-discover/--no-auto-discover", help="Auto-discover user metadata from feed"
4375
-
"""Add a user or feed to thicket."""
4377
-
if subcommand == "user":
4378
-
add_user(username, feed_url, email, homepage, icon, display_name, config_file, auto_discover)
4379
-
elif subcommand == "feed":
4380
-
add_feed(username, feed_url, config_file)
4382
-
print_error(f"Unknown subcommand: {subcommand}")
4383
-
print_error("Use 'user' or 'feed'")
4384
-
raise typer.Exit(1)
4389
-
feed_url: Optional[str],
4390
-
email: Optional[str],
4391
-
homepage: Optional[str],
4392
-
icon: Optional[str],
4393
-
display_name: Optional[str],
4394
-
config_file: Path,
4395
-
auto_discover: bool,
4397
-
"""Add a new user with feed."""
4400
-
print_error("Feed URL is required when adding a user")
4401
-
raise typer.Exit(1)
4403
-
# Validate feed URL
4405
-
validated_feed_url = HttpUrl(feed_url)
4406
-
except ValidationError:
4407
-
print_error(f"Invalid feed URL: {feed_url}")
4408
-
raise typer.Exit(1) from None
4410
-
# Load configuration
4411
-
config = load_config(config_file)
4413
-
# Initialize Git store
4414
-
git_store = GitStore(config.git_store)
4416
-
# Check if user already exists
4417
-
existing_user = git_store.get_user(username)
4419
-
print_error(f"User '{username}' already exists")
4420
-
print_error("Use 'thicket add feed' to add additional feeds")
4421
-
raise typer.Exit(1)
4423
-
# Auto-discover metadata if enabled
4424
-
discovered_metadata = None
4426
-
discovered_metadata = asyncio.run(discover_feed_metadata(validated_feed_url))
4428
-
# Prepare user data with manual overrides taking precedence
4429
-
user_display_name = display_name or (discovered_metadata.author_name or discovered_metadata.title if discovered_metadata else None)
4430
-
user_email = email or (discovered_metadata.author_email if discovered_metadata else None)
4431
-
user_homepage = homepage or (str(discovered_metadata.author_uri or discovered_metadata.link) if discovered_metadata else None)
4432
-
user_icon = icon or (str(discovered_metadata.logo or discovered_metadata.icon or discovered_metadata.image_url) if discovered_metadata else None)
4434
-
# Add user to Git store
4435
-
git_store.add_user(
4436
-
username=username,
4437
-
display_name=user_display_name,
4439
-
homepage=user_homepage,
4441
-
feeds=[str(validated_feed_url)],
4445
-
git_store.commit_changes(f"Add user: {username}")
4447
-
print_success(f"Added user '{username}' with feed: {feed_url}")
4449
-
if discovered_metadata and auto_discover:
4450
-
print_info("Auto-discovered metadata:")
4451
-
if user_display_name:
4452
-
print_info(f" Display name: {user_display_name}")
4454
-
print_info(f" Email: {user_email}")
4456
-
print_info(f" Homepage: {user_homepage}")
4458
-
print_info(f" Icon: {user_icon}")
4461
-
def add_feed(username: str, feed_url: Optional[str], config_file: Path) -> None:
4462
-
"""Add a feed to an existing user."""
4465
-
print_error("Feed URL is required")
4466
-
raise typer.Exit(1)
4468
-
# Validate feed URL
4470
-
validated_feed_url = HttpUrl(feed_url)
4471
-
except ValidationError:
4472
-
print_error(f"Invalid feed URL: {feed_url}")
4473
-
raise typer.Exit(1) from None
4475
-
# Load configuration
4476
-
config = load_config(config_file)
4478
-
# Initialize Git store
4479
-
git_store = GitStore(config.git_store)
4481
-
# Check if user exists
4482
-
user = git_store.get_user(username)
4484
-
print_error(f"User '{username}' not found")
4485
-
print_error("Use 'thicket add user' to add a new user")
4486
-
raise typer.Exit(1)
4488
-
# Check if feed already exists
4489
-
if str(validated_feed_url) in user.feeds:
4490
-
print_error(f"Feed already exists for user '{username}': {feed_url}")
4491
-
raise typer.Exit(1)
4493
-
# Add feed to user
4494
-
updated_feeds = user.feeds + [str(validated_feed_url)]
4495
-
if git_store.update_user(username, feeds=updated_feeds):
4496
-
git_store.commit_changes(f"Add feed to user {username}: {feed_url}")
4497
-
print_success(f"Added feed to user '{username}': {feed_url}")
4499
-
print_error(f"Failed to add feed to user '{username}'")
4500
-
raise typer.Exit(1)
4503
-
async def discover_feed_metadata(feed_url: HttpUrl):
4504
-
"""Discover metadata from a feed URL."""
4506
-
with create_progress() as progress:
4507
-
task = progress.add_task("Discovering feed metadata...", total=None)
4509
-
parser = FeedParser()
4510
-
content = await parser.fetch_feed(feed_url)
4511
-
metadata, _ = parser.parse_feed(content, feed_url)
4513
-
progress.update(task, completed=True)
4516
-
except Exception as e:
4517
-
print_error(f"Failed to discover feed metadata: {e}")
4521
-
<file path="src/thicket/cli/commands/duplicates.py">
4522
-
"""Duplicates command for thicket."""
4524
-
from pathlib import Path
4525
-
from typing import Optional
4528
-
from rich.table import Table
4530
-
from ...core.git_store import GitStore
4531
-
from ..main import app
4532
-
from ..utils import (
4542
-
@app.command("duplicates")
4543
-
def duplicates_command(
4544
-
action: str = typer.Argument(..., help="Action: 'list', 'add', 'remove'"),
4545
-
duplicate_id: Optional[str] = typer.Argument(None, help="Duplicate entry ID"),
4546
-
canonical_id: Optional[str] = typer.Argument(None, help="Canonical entry ID"),
4547
-
config_file: Optional[Path] = typer.Option(
4548
-
Path("thicket.yaml"), "--config", help="Configuration file path"
4551
-
"""Manage duplicate entry mappings."""
4553
-
# Load configuration
4554
-
config = load_config(config_file)
4556
-
# Initialize Git store
4557
-
git_store = GitStore(config.git_store)
4559
-
if action == "list":
4560
-
list_duplicates(git_store)
4561
-
elif action == "add":
4562
-
add_duplicate(git_store, duplicate_id, canonical_id)
4563
-
elif action == "remove":
4564
-
remove_duplicate(git_store, duplicate_id)
4566
-
print_error(f"Unknown action: {action}")
4567
-
print_error("Use 'list', 'add', or 'remove'")
4568
-
raise typer.Exit(1)
4571
-
def list_duplicates(git_store: GitStore) -> None:
4572
-
"""List all duplicate mappings."""
4573
-
duplicates = git_store.get_duplicates()
4575
-
if not duplicates.duplicates:
4576
-
if get_tsv_mode():
4577
-
print("No duplicate mappings found")
4579
-
print_info("No duplicate mappings found")
4582
-
if get_tsv_mode():
4583
-
print("Duplicate ID\tCanonical ID")
4584
-
for duplicate_id, canonical_id in duplicates.duplicates.items():
4585
-
print(f"{duplicate_id}\t{canonical_id}")
4586
-
print(f"Total duplicates: {len(duplicates.duplicates)}")
4588
-
table = Table(title="Duplicate Entry Mappings")
4589
-
table.add_column("Duplicate ID", style="red")
4590
-
table.add_column("Canonical ID", style="green")
4592
-
for duplicate_id, canonical_id in duplicates.duplicates.items():
4593
-
table.add_row(duplicate_id, canonical_id)
4595
-
console.print(table)
4596
-
print_info(f"Total duplicates: {len(duplicates.duplicates)}")
4599
-
def add_duplicate(git_store: GitStore, duplicate_id: Optional[str], canonical_id: Optional[str]) -> None:
4600
-
"""Add a duplicate mapping."""
4601
-
if not duplicate_id:
4602
-
print_error("Duplicate ID is required")
4603
-
raise typer.Exit(1)
4605
-
if not canonical_id:
4606
-
print_error("Canonical ID is required")
4607
-
raise typer.Exit(1)
4609
-
# Check if duplicate_id already exists
4610
-
duplicates = git_store.get_duplicates()
4611
-
if duplicates.is_duplicate(duplicate_id):
4612
-
existing_canonical = duplicates.get_canonical(duplicate_id)
4613
-
print_error(f"Duplicate ID already mapped to: {existing_canonical}")
4614
-
print_error("Use 'remove' first to change the mapping")
4615
-
raise typer.Exit(1)
4617
-
# Check if we're trying to make a canonical ID point to itself
4618
-
if duplicate_id == canonical_id:
4619
-
print_error("Duplicate ID cannot be the same as canonical ID")
4620
-
raise typer.Exit(1)
4623
-
git_store.add_duplicate(duplicate_id, canonical_id)
4626
-
git_store.commit_changes(f"Add duplicate mapping: {duplicate_id} -> {canonical_id}")
4628
-
print_success(f"Added duplicate mapping: {duplicate_id} -> {canonical_id}")
4631
-
def remove_duplicate(git_store: GitStore, duplicate_id: Optional[str]) -> None:
4632
-
"""Remove a duplicate mapping."""
4633
-
if not duplicate_id:
4634
-
print_error("Duplicate ID is required")
4635
-
raise typer.Exit(1)
4637
-
# Check if mapping exists
4638
-
duplicates = git_store.get_duplicates()
4639
-
if not duplicates.is_duplicate(duplicate_id):
4640
-
print_error(f"No duplicate mapping found for: {duplicate_id}")
4641
-
raise typer.Exit(1)
4643
-
canonical_id = duplicates.get_canonical(duplicate_id)
4645
-
# Remove the mapping
4646
-
if git_store.remove_duplicate(duplicate_id):
4648
-
git_store.commit_changes(f"Remove duplicate mapping: {duplicate_id} -> {canonical_id}")
4649
-
print_success(f"Removed duplicate mapping: {duplicate_id} -> {canonical_id}")
4651
-
print_error(f"Failed to remove duplicate mapping: {duplicate_id}")
4652
-
raise typer.Exit(1)
4655
-
<file path="src/thicket/cli/commands/sync.py">
4656
-
"""Sync command for thicket."""
4659
-
from pathlib import Path
4660
-
from typing import Optional
4663
-
from rich.progress import track
4665
-
from ...core.feed_parser import FeedParser
4666
-
from ...core.git_store import GitStore
4667
-
from ..main import app
4668
-
from ..utils import (
4678
-
all_users: bool = typer.Option(
4679
-
False, "--all", "-a", help="Sync all users and feeds"
4681
-
user: Optional[str] = typer.Option(
4682
-
None, "--user", "-u", help="Sync specific user only"
4684
-
config_file: Optional[Path] = typer.Option(
4685
-
Path("thicket.yaml"), "--config", help="Configuration file path"
4687
-
dry_run: bool = typer.Option(
4688
-
False, "--dry-run", help="Show what would be synced without making changes"
4691
-
"""Sync feeds and store entries in Git repository."""
4693
-
# Load configuration
4694
-
config = load_config(config_file)
4696
-
# Initialize Git store
4697
-
git_store = GitStore(config.git_store)
4699
-
# Determine which users to sync from git repository
4700
-
users_to_sync = []
4702
-
index = git_store._load_index()
4703
-
users_to_sync = list(index.users.values())
4705
-
user_metadata = git_store.get_user(user)
4706
-
if not user_metadata:
4707
-
print_error(f"User '{user}' not found in git repository")
4708
-
raise typer.Exit(1)
4709
-
users_to_sync = [user_metadata]
4711
-
print_error("Specify --all to sync all users or --user to sync a specific user")
4712
-
raise typer.Exit(1)
4714
-
if not users_to_sync:
4715
-
print_info("No users configured to sync")
4719
-
total_new_entries = 0
4720
-
total_updated_entries = 0
4722
-
for user_metadata in users_to_sync:
4723
-
print_info(f"Syncing user: {user_metadata.username}")
4725
-
user_new_entries = 0
4726
-
user_updated_entries = 0
4728
-
# Sync each feed for the user
4729
-
for feed_url in track(user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"):
4731
-
new_entries, updated_entries = asyncio.run(
4732
-
sync_feed(git_store, user_metadata.username, feed_url, dry_run)
4734
-
user_new_entries += new_entries
4735
-
user_updated_entries += updated_entries
4737
-
except Exception as e:
4738
-
print_error(f"Failed to sync feed {feed_url}: {e}")
4741
-
print_info(f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated")
4742
-
total_new_entries += user_new_entries
4743
-
total_updated_entries += user_updated_entries
4745
-
# Commit changes if not dry run
4746
-
if not dry_run and (total_new_entries > 0 or total_updated_entries > 0):
4747
-
commit_message = f"Sync feeds: {total_new_entries} new entries, {total_updated_entries} updated"
4748
-
git_store.commit_changes(commit_message)
4749
-
print_success(f"Committed changes: {commit_message}")
4753
-
print_info(f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated")
4755
-
print_success(f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated")
4758
-
async def sync_feed(git_store: GitStore, username: str, feed_url, dry_run: bool) -> tuple[int, int]:
4759
-
"""Sync a single feed for a user."""
4761
-
parser = FeedParser()
4764
-
# Fetch and parse feed
4765
-
content = await parser.fetch_feed(feed_url)
4766
-
metadata, entries = parser.parse_feed(content, feed_url)
4769
-
updated_entries = 0
4771
-
# Process each entry
4772
-
for entry in entries:
4774
-
# Check if entry already exists
4775
-
existing_entry = git_store.get_entry(username, entry.id)
4777
-
if existing_entry:
4778
-
# Check if entry has been updated
4779
-
if existing_entry.updated != entry.updated:
4781
-
git_store.store_entry(username, entry)
4782
-
updated_entries += 1
4786
-
git_store.store_entry(username, entry)
4789
-
except Exception as e:
4790
-
print_error(f"Failed to process entry {entry.id}: {e}")
4793
-
return new_entries, updated_entries
4795
-
except Exception as e:
4796
-
print_error(f"Failed to sync feed {feed_url}: {e}")
4800
-
<file path="src/thicket/models/config.py">
4801
-
"""Configuration models for thicket."""
4803
-
from pathlib import Path
4804
-
from typing import Optional
4806
-
from pydantic import BaseModel, EmailStr, HttpUrl
4807
-
from pydantic_settings import BaseSettings, SettingsConfigDict
4810
-
class UserConfig(BaseModel):
4811
-
"""Configuration for a single user and their feeds."""
4814
-
feeds: list[HttpUrl]
4815
-
email: Optional[EmailStr] = None
4816
-
homepage: Optional[HttpUrl] = None
4817
-
icon: Optional[HttpUrl] = None
4818
-
display_name: Optional[str] = None
4821
-
class ThicketConfig(BaseSettings):
4822
-
"""Main configuration for thicket."""
4824
-
model_config = SettingsConfigDict(
4825
-
env_prefix="THICKET_",
4827
-
yaml_file="thicket.yaml",
4828
-
case_sensitive=False,
4833
-
users: list[UserConfig] = []
4836
-
<file path="src/thicket/cli/commands/links_cmd.py">
4837
-
"""CLI command for extracting and categorizing all outbound links from blog entries."""
4841
-
from pathlib import Path
4842
-
from typing import Dict, List, Optional, Set
4843
-
from urllib.parse import urljoin, urlparse
4846
-
from rich.console import Console
4847
-
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
4848
-
from rich.table import Table
4850
-
from ...core.git_store import GitStore
4851
-
from ..main import app
4852
-
from ..utils import load_config, get_tsv_mode
4854
-
console = Console()
4858
-
"""Represents a link found in a blog entry."""
4860
-
def __init__(self, url: str, entry_id: str, username: str):
4862
-
self.entry_id = entry_id
4863
-
self.username = username
4865
-
def to_dict(self) -> dict:
4866
-
"""Convert to dictionary for JSON serialization."""
4869
-
"entry_id": self.entry_id,
4870
-
"username": self.username
4874
-
def from_dict(cls, data: dict) -> "LinkData":
4875
-
"""Create from dictionary."""
4878
-
entry_id=data["entry_id"],
4879
-
username=data["username"]
4883
-
class LinkCategorizer:
4884
-
"""Categorizes links as internal, user, or unknown."""
4886
-
def __init__(self, user_domains: Dict[str, Set[str]]):
4887
-
self.user_domains = user_domains
4888
-
# Create reverse mapping of domain -> username
4889
-
self.domain_to_user = {}
4890
-
for username, domains in user_domains.items():
4891
-
for domain in domains:
4892
-
self.domain_to_user[domain] = username
4894
-
def categorize_url(self, url: str, source_username: str) -> tuple[str, Optional[str]]:
4896
-
Categorize a URL as 'internal', 'user', or 'unknown'.
4897
-
Returns (category, target_username).
4900
-
parsed = urlparse(url)
4901
-
domain = parsed.netloc.lower()
4903
-
# Check if it's a link to the same user's domain (internal)
4904
-
if domain in self.user_domains.get(source_username, set()):
4905
-
return "internal", source_username
4907
-
# Check if it's a link to another user's domain
4908
-
if domain in self.domain_to_user:
4909
-
return "user", self.domain_to_user[domain]
4911
-
# Everything else is unknown
4912
-
return "unknown", None
4915
-
return "unknown", None
4918
-
class LinkExtractor:
4919
-
"""Extracts and resolves links from blog entries."""
4921
-
def __init__(self):
4922
-
# Pattern for extracting links from HTML
4923
-
self.link_pattern = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL)
4924
-
self.url_pattern = re.compile(r'https?://[^\s<>"]+')
4926
-
def extract_links_from_html(self, html_content: str, base_url: str) -> List[tuple[str, str]]:
4927
-
"""Extract all links from HTML content and resolve them against base URL."""
4930
-
# Extract links from <a> tags
4931
-
for match in self.link_pattern.finditer(html_content):
4932
-
url = match.group(1)
4933
-
text = re.sub(r'<[^>]+>', '', match.group(2)).strip() # Remove HTML tags from link text
4935
-
# Resolve relative URLs against base URL
4936
-
resolved_url = urljoin(base_url, url)
4937
-
links.append((resolved_url, text))
4942
-
def extract_links_from_entry(self, entry, username: str, base_url: str) -> List[LinkData]:
4943
-
"""Extract all links from a blog entry."""
4946
-
# Combine all text content for analysis
4947
-
content_to_search = []
4949
-
content_to_search.append(entry.content)
4951
-
content_to_search.append(entry.summary)
4953
-
for content in content_to_search:
4954
-
extracted_links = self.extract_links_from_html(content, base_url)
4956
-
for url, link_text in extracted_links:
4958
-
if not url or url.startswith('#'):
4961
-
link_data = LinkData(
4963
-
entry_id=entry.id,
4967
-
links.append(link_data)
4974
-
config_file: Optional[Path] = typer.Option(
4975
-
Path("thicket.yaml"),
4978
-
help="Path to configuration file",
4980
-
output_file: Optional[Path] = typer.Option(
4984
-
help="Path to output unified links file (default: links.json in git store)",
4986
-
verbose: bool = typer.Option(
4990
-
help="Show detailed progress information",
4993
-
"""Extract and categorize all outbound links from blog entries.
4995
-
This command analyzes all blog entries to extract outbound links,
4996
-
resolve them properly with respect to the feed's base URL, and
4997
-
categorize them as internal, user, or unknown links.
4999
-
Creates a unified links.json file containing all link data.
5002
-
# Load configuration
5003
-
config = load_config(config_file)
5005
-
# Initialize Git store
5006
-
git_store = GitStore(config.git_store)
5008
-
# Build user domain mapping
5010
-
console.print("Building user domain mapping...")
5012
-
index = git_store._load_index()
5015
-
for username, user_metadata in index.users.items():
5018
-
# Add domains from feeds
5019
-
for feed_url in user_metadata.feeds:
5020
-
domain = urlparse(feed_url).netloc.lower()
5022
-
domains.add(domain)
5024
-
# Add domain from homepage
5025
-
if user_metadata.homepage:
5026
-
domain = urlparse(str(user_metadata.homepage)).netloc.lower()
5028
-
domains.add(domain)
5030
-
user_domains[username] = domains
5033
-
console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")
5035
-
# Initialize components
5036
-
link_extractor = LinkExtractor()
5037
-
categorizer = LinkCategorizer(user_domains)
5040
-
users = list(index.users.keys())
5043
-
console.print("[yellow]No users found in Git store[/yellow]")
5044
-
raise typer.Exit(0)
5046
-
# Process all entries
5048
-
link_categories = {"internal": [], "user": [], "unknown": []}
5049
-
link_dict = {} # Dictionary with link URL as key, maps to list of atom IDs
5050
-
reverse_dict = {} # Dictionary with atom ID as key, maps to list of URLs
5054
-
TextColumn("[progress.description]{task.description}"),
5056
-
TaskProgressColumn(),
5060
-
# Count total entries first
5061
-
counting_task = progress.add_task("Counting entries...", total=len(users))
5064
-
for username in users:
5065
-
entries = git_store.list_entries(username)
5066
-
total_entries += len(entries)
5067
-
progress.advance(counting_task)
5069
-
progress.remove_task(counting_task)
5072
-
processing_task = progress.add_task(
5073
-
f"Processing {total_entries} entries...",
5074
-
total=total_entries
5077
-
for username in users:
5078
-
entries = git_store.list_entries(username)
5079
-
user_metadata = index.users[username]
5081
-
# Get base URL for this user (use first feed URL)
5082
-
base_url = str(user_metadata.feeds[0]) if user_metadata.feeds else "https://example.com"
5084
-
for entry in entries:
5085
-
# Extract links from this entry
5086
-
entry_links = link_extractor.extract_links_from_entry(entry, username, base_url)
5088
-
# Track unique links per entry
5089
-
entry_urls_seen = set()
5091
-
# Categorize each link
5092
-
for link_data in entry_links:
5093
-
# Skip if we've already seen this URL in this entry
5094
-
if link_data.url in entry_urls_seen:
5096
-
entry_urls_seen.add(link_data.url)
5098
-
category, target_username = categorizer.categorize_url(link_data.url, username)
5100
-
# Add to link dictionary (URL as key, maps to list of atom IDs)
5101
-
if link_data.url not in link_dict:
5102
-
link_dict[link_data.url] = []
5103
-
if link_data.entry_id not in link_dict[link_data.url]:
5104
-
link_dict[link_data.url].append(link_data.entry_id)
5106
-
# Also add to reverse mapping (atom ID -> list of URLs)
5107
-
if link_data.entry_id not in reverse_dict:
5108
-
reverse_dict[link_data.entry_id] = []
5109
-
if link_data.url not in reverse_dict[link_data.entry_id]:
5110
-
reverse_dict[link_data.entry_id].append(link_data.url)
5112
-
# Add category info to link data for categories tracking
5113
-
link_info = link_data.to_dict()
5114
-
link_info["category"] = category
5115
-
link_info["target_username"] = target_username
5117
-
all_links.append(link_info)
5118
-
link_categories[category].append(link_info)
5120
-
progress.advance(processing_task)
5122
-
if verbose and entry_links:
5123
-
console.print(f" Found {len(entry_links)} links in {username}:{entry.title[:50]}...")
5125
-
# Determine output path
5127
-
output_path = output_file
5129
-
output_path = config.git_store / "links.json"
5131
-
# Save all extracted links (not just filtered ones)
5133
-
console.print("Preparing output data...")
5135
-
# Build a set of all URLs that correspond to posts in the git database
5136
-
registered_urls = set()
5138
-
# Get all entries from all users and build URL mappings
5139
-
for username in users:
5140
-
entries = git_store.list_entries(username)
5141
-
user_metadata = index.users[username]
5143
-
for entry in entries:
5144
-
# Try to match entry URLs with extracted links
5145
-
if hasattr(entry, 'link') and entry.link:
5146
-
registered_urls.add(str(entry.link))
5148
-
# Also check entry alternate links if they exist
5149
-
if hasattr(entry, 'links') and entry.links:
5150
-
for link in entry.links:
5151
-
if hasattr(link, 'href') and link.href:
5152
-
registered_urls.add(str(link.href))
5154
-
# Build unified structure with metadata
5155
-
unified_links = {}
5156
-
reverse_mapping = {}
5158
-
for url, entry_ids in link_dict.items():
5159
-
unified_links[url] = {
5160
-
"referencing_entries": entry_ids
5163
-
# Find target username if this is a tracked post
5164
-
if url in registered_urls:
5165
-
for username in users:
5166
-
user_domains_set = {domain for domain in user_domains.get(username, [])}
5167
-
if any(domain in url for domain in user_domains_set):
5168
-
unified_links[url]["target_username"] = username
5171
-
# Build reverse mapping
5172
-
for entry_id in entry_ids:
5173
-
if entry_id not in reverse_mapping:
5174
-
reverse_mapping[entry_id] = []
5175
-
if url not in reverse_mapping[entry_id]:
5176
-
reverse_mapping[entry_id].append(url)
5178
-
# Create unified output data
5180
-
"links": unified_links,
5181
-
"reverse_mapping": reverse_mapping,
5182
-
"user_domains": {k: list(v) for k, v in user_domains.items()}
5186
-
console.print(f"Found {len(registered_urls)} registered post URLs")
5187
-
console.print(f"Found {len(link_dict)} total links, {sum(1 for link in unified_links.values() if 'target_username' in link)} tracked posts")
5189
-
# Save unified data
5190
-
with open(output_path, "w") as f:
5191
-
json.dump(output_data, f, indent=2, default=str)
5194
-
if not get_tsv_mode():
5195
-
console.print("\n[green]โ Links extraction completed successfully[/green]")
5197
-
# Create summary table or TSV output
5198
-
if get_tsv_mode():
5199
-
print("Category\tCount\tDescription")
5200
-
print(f"Internal\t{len(link_categories['internal'])}\tLinks to same user's domain")
5201
-
print(f"User\t{len(link_categories['user'])}\tLinks to other tracked users")
5202
-
print(f"Unknown\t{len(link_categories['unknown'])}\tLinks to external sites")
5203
-
print(f"Total Extracted\t{len(all_links)}\tAll extracted links")
5204
-
print(f"Saved to Output\t{len(output_data['links'])}\tLinks saved to output file")
5205
-
print(f"Cross-references\t{sum(1 for link in unified_links.values() if 'target_username' in link)}\tLinks to registered posts only")
5207
-
table = Table(title="Links Summary")
5208
-
table.add_column("Category", style="cyan")
5209
-
table.add_column("Count", style="green")
5210
-
table.add_column("Description", style="white")
5212
-
table.add_row("Internal", str(len(link_categories["internal"])), "Links to same user's domain")
5213
-
table.add_row("User", str(len(link_categories["user"])), "Links to other tracked users")
5214
-
table.add_row("Unknown", str(len(link_categories["unknown"])), "Links to external sites")
5215
-
table.add_row("Total Extracted", str(len(all_links)), "All extracted links")
5216
-
table.add_row("Saved to Output", str(len(output_data['links'])), "Links saved to output file")
5217
-
table.add_row("Cross-references", str(sum(1 for link in unified_links.values() if 'target_username' in link)), "Links to registered posts only")
5219
-
console.print(table)
5221
-
# Show user links if verbose
5222
-
if verbose and link_categories["user"]:
5223
-
if get_tsv_mode():
5224
-
print("User Link Source\tUser Link Target\tLink Count")
5225
-
user_link_counts = {}
5227
-
for link in link_categories["user"]:
5228
-
key = f"{link['username']} -> {link['target_username']}"
5229
-
user_link_counts[key] = user_link_counts.get(key, 0) + 1
5231
-
for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
5232
-
source, target = link_pair.split(" -> ")
5233
-
print(f"{source}\t{target}\t{count}")
5235
-
console.print("\n[bold]User-to-user links:[/bold]")
5236
-
user_link_counts = {}
5238
-
for link in link_categories["user"]:
5239
-
key = f"{link['username']} -> {link['target_username']}"
5240
-
user_link_counts[key] = user_link_counts.get(key, 0) + 1
5242
-
for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
5243
-
console.print(f" {link_pair}: {count} links")
5245
-
if not get_tsv_mode():
5246
-
console.print(f"\nUnified links data saved to: {output_path}")
5248
-
except Exception as e:
5249
-
console.print(f"[red]Error extracting links: {e}[/red]")
5251
-
console.print_exception()
5252
-
raise typer.Exit(1)
5255
-
<file path="src/thicket/cli/commands/list_cmd.py">
5256
-
"""List command for thicket."""
5259
-
from pathlib import Path
5260
-
from typing import Optional
5263
-
from rich.table import Table
5265
-
from ...core.git_store import GitStore
5266
-
from ..main import app
5267
-
from ..utils import (
5271
-
print_feeds_table,
5272
-
print_feeds_table_from_git,
5274
-
print_users_table,
5275
-
print_users_table_from_git,
5276
-
print_entries_tsv,
5281
-
@app.command("list")
5283
-
what: str = typer.Argument(..., help="What to list: 'users', 'feeds', 'entries'"),
5284
-
user: Optional[str] = typer.Option(
5285
-
None, "--user", "-u", help="Filter by specific user"
5287
-
limit: Optional[int] = typer.Option(
5288
-
None, "--limit", "-l", help="Limit number of results"
5290
-
config_file: Optional[Path] = typer.Option(
5291
-
Path("thicket.yaml"), "--config", help="Configuration file path"
5294
-
"""List users, feeds, or entries."""
5296
-
# Load configuration
5297
-
config = load_config(config_file)
5299
-
# Initialize Git store
5300
-
git_store = GitStore(config.git_store)
5302
-
if what == "users":
5303
-
list_users(git_store)
5304
-
elif what == "feeds":
5305
-
list_feeds(git_store, user)
5306
-
elif what == "entries":
5307
-
list_entries(git_store, user, limit)
5309
-
print_error(f"Unknown list type: {what}")
5310
-
print_error("Use 'users', 'feeds', or 'entries'")
5311
-
raise typer.Exit(1)
5314
-
def list_users(git_store: GitStore) -> None:
5315
-
"""List all users."""
5316
-
index = git_store._load_index()
5317
-
users = list(index.users.values())
5320
-
print_info("No users configured")
5323
-
print_users_table_from_git(users)
5326
-
def list_feeds(git_store: GitStore, username: Optional[str] = None) -> None:
5327
-
"""List feeds, optionally filtered by user."""
5329
-
user = git_store.get_user(username)
5331
-
print_error(f"User '{username}' not found")
5332
-
raise typer.Exit(1)
5334
-
if not user.feeds:
5335
-
print_info(f"No feeds configured for user '{username}'")
5338
-
print_feeds_table_from_git(git_store, username)
5341
-
def list_entries(git_store: GitStore, username: Optional[str] = None, limit: Optional[int] = None) -> None:
5342
-
"""List entries, optionally filtered by user."""
5345
-
# List entries for specific user
5346
-
user = git_store.get_user(username)
5348
-
print_error(f"User '{username}' not found")
5349
-
raise typer.Exit(1)
5351
-
entries = git_store.list_entries(username, limit)
5353
-
print_info(f"No entries found for user '{username}'")
5356
-
print_entries_table([entries], [username])
5359
-
# List entries for all users
5361
-
all_usernames = []
5363
-
index = git_store._load_index()
5364
-
for user in index.users.values():
5365
-
entries = git_store.list_entries(user.username, limit)
5367
-
all_entries.append(entries)
5368
-
all_usernames.append(user.username)
5370
-
if not all_entries:
5371
-
print_info("No entries found")
5374
-
print_entries_table(all_entries, all_usernames)
5377
-
def _clean_html_content(content: Optional[str]) -> str:
5378
-
"""Clean HTML content for display in table."""
5382
-
# Remove HTML tags
5383
-
clean_text = re.sub(r'<[^>]+>', ' ', content)
5384
-
# Replace multiple whitespace with single space
5385
-
clean_text = re.sub(r'\s+', ' ', clean_text)
5386
-
# Strip and limit length
5387
-
clean_text = clean_text.strip()
5388
-
if len(clean_text) > 100:
5389
-
clean_text = clean_text[:97] + "..."
5394
-
def print_entries_table(entries_by_user: list[list], usernames: list[str]) -> None:
5395
-
"""Print a table of entries."""
5396
-
if get_tsv_mode():
5397
-
print_entries_tsv(entries_by_user, usernames)
5400
-
table = Table(title="Feed Entries")
5401
-
table.add_column("User", style="cyan", no_wrap=True)
5402
-
table.add_column("Title", style="bold")
5403
-
table.add_column("Updated", style="blue")
5404
-
table.add_column("URL", style="green")
5406
-
# Combine all entries with usernames
5408
-
for entries, username in zip(entries_by_user, usernames):
5409
-
for entry in entries:
5410
-
all_entries.append((username, entry))
5412
-
# Sort by updated time (newest first)
5413
-
all_entries.sort(key=lambda x: x[1].updated, reverse=True)
5415
-
for username, entry in all_entries:
5416
-
# Format updated time
5417
-
updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")
5419
-
# Truncate title if too long
5420
-
title = entry.title
5421
-
if len(title) > 50:
5422
-
title = title[:47] + "..."
5431
-
console.print(table)
5434
-
<file path="src/thicket/cli/main.py">
5435
-
"""Main CLI application using Typer."""
5438
-
from rich.console import Console
5440
-
from .. import __version__
5442
-
app = typer.Typer(
5444
-
help="A CLI tool for persisting Atom/RSS feeds in Git repositories",
5445
-
no_args_is_help=True,
5446
-
rich_markup_mode="rich",
5449
-
console = Console()
5451
-
# Global state for TSV output mode
5455
-
def version_callback(value: bool) -> None:
5456
-
"""Show version and exit."""
5458
-
console.print(f"thicket version {__version__}")
5459
-
raise typer.Exit()
5464
-
version: bool = typer.Option(
5468
-
help="Show the version and exit",
5469
-
callback=version_callback,
5472
-
tsv: bool = typer.Option(
5475
-
help="Output in tab-separated values format without truncation",
5478
-
"""Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories."""
5483
-
# Import commands to register them
5484
-
from .commands import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync
5486
-
if __name__ == "__main__":
5490
-
<file path="src/thicket/core/git_store.py">
5491
-
"""Git repository operations for thicket."""
5494
-
from datetime import datetime
5495
-
from pathlib import Path
5496
-
from typing import Optional
5499
-
from git import Repo
5501
-
from ..models import AtomEntry, DuplicateMap, GitStoreIndex, UserMetadata
5505
-
"""Manages the Git repository for storing feed entries."""
5507
-
def __init__(self, repo_path: Path):
5508
-
"""Initialize the Git store."""
5509
-
self.repo_path = repo_path
5510
-
self.repo: Optional[Repo] = None
5511
-
self._ensure_repo()
5513
-
def _ensure_repo(self) -> None:
5514
-
"""Ensure the Git repository exists and is initialized."""
5515
-
if not self.repo_path.exists():
5516
-
self.repo_path.mkdir(parents=True, exist_ok=True)
5519
-
self.repo = Repo(self.repo_path)
5520
-
except git.InvalidGitRepositoryError:
5521
-
# Initialize new repository
5522
-
self.repo = Repo.init(self.repo_path)
5523
-
self._create_initial_structure()
5525
-
def _create_initial_structure(self) -> None:
5526
-
"""Create initial Git store structure."""
5527
-
# Create index.json
5528
-
index = GitStoreIndex(
5529
-
created=datetime.now(),
5530
-
last_updated=datetime.now(),
5532
-
self._save_index(index)
5534
-
# Create duplicates.json
5535
-
duplicates = DuplicateMap()
5536
-
self._save_duplicates(duplicates)
5538
-
# Create initial commit
5539
-
self.repo.index.add(["index.json", "duplicates.json"])
5540
-
self.repo.index.commit("Initial thicket repository structure")
5542
-
def _save_index(self, index: GitStoreIndex) -> None:
5543
-
"""Save the index to index.json."""
5544
-
index_path = self.repo_path / "index.json"
5545
-
with open(index_path, "w") as f:
5546
-
json.dump(index.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)
5548
-
def _load_index(self) -> GitStoreIndex:
5549
-
"""Load the index from index.json."""
5550
-
index_path = self.repo_path / "index.json"
5551
-
if not index_path.exists():
5552
-
return GitStoreIndex(
5553
-
created=datetime.now(),
5554
-
last_updated=datetime.now(),
5557
-
with open(index_path) as f:
5558
-
data = json.load(f)
5560
-
return GitStoreIndex(**data)
5562
-
def _save_duplicates(self, duplicates: DuplicateMap) -> None:
5563
-
"""Save duplicates map to duplicates.json."""
5564
-
duplicates_path = self.repo_path / "duplicates.json"
5565
-
with open(duplicates_path, "w") as f:
5566
-
json.dump(duplicates.model_dump(exclude_none=True), f, indent=2)
5568
-
def _load_duplicates(self) -> DuplicateMap:
5569
-
"""Load duplicates map from duplicates.json."""
5570
-
duplicates_path = self.repo_path / "duplicates.json"
5571
-
if not duplicates_path.exists():
5572
-
return DuplicateMap()
5574
-
with open(duplicates_path) as f:
5575
-
data = json.load(f)
5577
-
return DuplicateMap(**data)
5579
-
def add_user(self, username: str, display_name: Optional[str] = None,
5580
-
email: Optional[str] = None, homepage: Optional[str] = None,
5581
-
icon: Optional[str] = None, feeds: Optional[list[str]] = None) -> UserMetadata:
5582
-
"""Add a new user to the Git store."""
5583
-
index = self._load_index()
5585
-
# Create user directory
5586
-
user_dir = self.repo_path / username
5587
-
user_dir.mkdir(exist_ok=True)
5589
-
# Create user metadata
5590
-
user_metadata = UserMetadata(
5591
-
username=username,
5592
-
display_name=display_name,
5594
-
homepage=homepage,
5596
-
feeds=feeds or [],
5597
-
directory=username,
5598
-
created=datetime.now(),
5599
-
last_updated=datetime.now(),
5604
-
index.add_user(user_metadata)
5605
-
self._save_index(index)
5607
-
return user_metadata
5609
-
def get_user(self, username: str) -> Optional[UserMetadata]:
5610
-
"""Get user metadata by username."""
5611
-
index = self._load_index()
5612
-
return index.get_user(username)
5614
-
def update_user(self, username: str, **kwargs) -> bool:
5615
-
"""Update user metadata."""
5616
-
index = self._load_index()
5617
-
user = index.get_user(username)
5622
-
# Update user metadata
5623
-
for key, value in kwargs.items():
5624
-
if hasattr(user, key) and value is not None:
5625
-
setattr(user, key, value)
5627
-
user.update_timestamp()
5631
-
index.add_user(user)
5632
-
self._save_index(index)
5636
-
def store_entry(self, username: str, entry: AtomEntry) -> bool:
5637
-
"""Store an entry in the user's directory."""
5638
-
user = self.get_user(username)
5642
-
# Sanitize entry ID for filename
5643
-
from .feed_parser import FeedParser
5644
-
parser = FeedParser()
5645
-
safe_id = parser.sanitize_entry_id(entry.id)
5647
-
# Create entry file
5648
-
user_dir = self.repo_path / user.directory
5649
-
entry_path = user_dir / f"{safe_id}.json"
5651
-
# Check if entry already exists
5652
-
entry_exists = entry_path.exists()
5655
-
with open(entry_path, "w") as f:
5656
-
json.dump(entry.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)
5658
-
# Update user metadata if new entry
5659
-
if not entry_exists:
5660
-
index = self._load_index()
5661
-
index.update_entry_count(username, 1)
5662
-
self._save_index(index)
5666
-
def get_entry(self, username: str, entry_id: str) -> Optional[AtomEntry]:
5667
-
"""Get an entry by username and entry ID."""
5668
-
user = self.get_user(username)
5672
-
# Sanitize entry ID
5673
-
from .feed_parser import FeedParser
5674
-
parser = FeedParser()
5675
-
safe_id = parser.sanitize_entry_id(entry_id)
5677
-
entry_path = self.repo_path / user.directory / f"{safe_id}.json"
5678
-
if not entry_path.exists():
5681
-
with open(entry_path) as f:
5682
-
data = json.load(f)
5684
-
return AtomEntry(**data)
5686
-
def list_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]:
5687
-
"""List entries for a user."""
5688
-
user = self.get_user(username)
5692
-
user_dir = self.repo_path / user.directory
5693
-
if not user_dir.exists():
5697
-
entry_files = sorted(user_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
5701
-
entry_files = entry_files[:limit]
5703
-
for entry_file in entry_files:
5705
-
with open(entry_file) as f:
5706
-
data = json.load(f)
5707
-
entries.append(AtomEntry(**data))
5709
-
# Skip invalid entries
5714
-
def get_duplicates(self) -> DuplicateMap:
5715
-
"""Get the duplicates map."""
5716
-
return self._load_duplicates()
5718
-
def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
5719
-
"""Add a duplicate mapping."""
5720
-
duplicates = self._load_duplicates()
5721
-
duplicates.add_duplicate(duplicate_id, canonical_id)
5722
-
self._save_duplicates(duplicates)
5724
-
def remove_duplicate(self, duplicate_id: str) -> bool:
5725
-
"""Remove a duplicate mapping."""
5726
-
duplicates = self._load_duplicates()
5727
-
result = duplicates.remove_duplicate(duplicate_id)
5728
-
self._save_duplicates(duplicates)
5731
-
def commit_changes(self, message: str) -> None:
5732
-
"""Commit all changes to the Git repository."""
5737
-
self.repo.git.add(A=True)
5739
-
# Check if there are changes to commit
5740
-
if self.repo.index.diff("HEAD"):
5741
-
self.repo.index.commit(message)
5743
-
def get_stats(self) -> dict:
5744
-
"""Get statistics about the Git store."""
5745
-
index = self._load_index()
5746
-
duplicates = self._load_duplicates()
5749
-
"total_users": len(index.users),
5750
-
"total_entries": index.total_entries,
5751
-
"total_duplicates": len(duplicates.duplicates),
5752
-
"last_updated": index.last_updated,
5753
-
"repository_size": sum(f.stat().st_size for f in self.repo_path.rglob("*") if f.is_file()),
5756
-
def search_entries(self, query: str, username: Optional[str] = None,
5757
-
limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]:
5758
-
"""Search entries by content."""
5761
-
# Get users to search
5762
-
index = self._load_index()
5763
-
users = [index.get_user(username)] if username else list(index.users.values())
5764
-
users = [u for u in users if u is not None]
5766
-
for user in users:
5767
-
user_dir = self.repo_path / user.directory
5768
-
if not user_dir.exists():
5771
-
entry_files = user_dir.glob("*.json")
5773
-
for entry_file in entry_files:
5775
-
with open(entry_file) as f:
5776
-
data = json.load(f)
5778
-
entry = AtomEntry(**data)
5780
-
# Simple text search in title, summary, and content
5781
-
searchable_text = " ".join(filter(None, [
5783
-
entry.summary or "",
5784
-
entry.content or "",
5787
-
if query.lower() in searchable_text:
5788
-
results.append((user.username, entry))
5790
-
if limit and len(results) >= limit:
5794
-
# Skip invalid entries
5797
-
# Sort by updated time (newest first)
5798
-
results.sort(key=lambda x: x[1].updated, reverse=True)
5800
-
return results[:limit] if limit else results
5803
-
<file path="ARCH.md">
5804
-
# Thicket Architecture Design
5807
-
Thicket is a modern CLI tool for persisting Atom/RSS feeds in a Git repository, designed to enable distributed webblog comment structures.
5809
-
## Technology Stack
5811
-
### Core Libraries
5813
-
#### CLI Framework
5814
-
- **Typer** (0.15.x) - Modern CLI framework with type hints
5815
-
- **Rich** (13.x) - Beautiful terminal output, progress bars, and tables
5816
-
- **prompt-toolkit** - Interactive prompts when needed
5818
-
#### Feed Processing
5819
-
- **feedparser** (6.0.11) - Universal feed parser supporting RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0
5820
-
- Alternative: **atoma** for stricter Atom/RSS parsing with JSON feed support
5821
-
- Alternative: **fastfeedparser** for high-performance parsing (10x faster)
5823
-
#### Git Integration
5824
-
- **GitPython** (3.1.44) - High-level git operations, requires git CLI
5825
-
- Alternative: **pygit2** (1.18.0) - Direct libgit2 bindings, better for authentication
5828
-
- **httpx** (0.28.x) - Modern async/sync HTTP client with connection pooling
5829
-
- **aiohttp** (3.11.x) - For async-only operations if needed
5831
-
#### Configuration & Data Models
5832
-
- **pydantic** (2.11.x) - Data validation and settings management
5833
-
- **pydantic-settings** (2.10.x) - Configuration file handling with env var support
5836
-
- **pendulum** (3.x) - Better datetime handling
5837
-
- **bleach** (6.x) - HTML sanitization for feed content
5838
-
- **platformdirs** (4.x) - Cross-platform directory paths
5840
-
## Project Structure
5844
-
โโโ pyproject.toml # Modern Python packaging
5845
-
โโโ README.md # Project documentation
5846
-
โโโ ARCH.md # This file
5847
-
โโโ CLAUDE.md # Project instructions
5848
-
โโโ .gitignore
5850
-
โ โโโ thicket/
5851
-
โ โโโ __init__.py
5852
-
โ โโโ __main__.py # Entry point for `python -m thicket`
5853
-
โ โโโ cli/ # CLI commands and interface
5854
-
โ โ โโโ __init__.py
5855
-
โ โ โโโ main.py # Main CLI app with Typer
5856
-
โ โ โโโ commands/ # Subcommands
5857
-
โ โ โ โโโ __init__.py
5858
-
โ โ โ โโโ init.py # Initialize git store
5859
-
โ โ โ โโโ add.py # Add users and feeds
5860
-
โ โ โ โโโ sync.py # Sync feeds
5861
-
โ โ โ โโโ list_cmd.py # List users/feeds
5862
-
โ โ โ โโโ duplicates.py # Manage duplicate entries
5863
-
โ โ โ โโโ links_cmd.py # Extract and categorize links
5864
-
โ โ โ โโโ index_cmd.py # Build reference index and show threads
5865
-
โ โ โโโ utils.py # CLI utilities (progress, formatting)
5866
-
โ โโโ core/ # Core business logic
5867
-
โ โ โโโ __init__.py
5868
-
โ โ โโโ feed_parser.py # Feed parsing and normalization
5869
-
โ โ โโโ git_store.py # Git repository operations
5870
-
โ โ โโโ reference_parser.py # Link extraction and threading
5871
-
โ โโโ models/ # Pydantic data models
5872
-
โ โ โโโ __init__.py
5873
-
โ โ โโโ config.py # Configuration models
5874
-
โ โ โโโ feed.py # Feed/Entry models
5875
-
โ โ โโโ user.py # User metadata models
5876
-
โ โโโ utils/ # Shared utilities
5877
-
โ โโโ __init__.py
5879
-
โ โโโ __init__.py
5880
-
โ โโโ conftest.py # pytest configuration
5881
-
โ โโโ test_feed_parser.py
5882
-
โ โโโ test_git_store.py
5883
-
โ โโโ fixtures/ # Test data
5884
-
โ โโโ feeds/
5886
-
โโโ examples/ # Example configurations
5891
-
### Configuration File (YAML/TOML)
5893
-
class ThicketConfig(BaseSettings):
5894
-
git_store: Path # Git repository location
5895
-
cache_dir: Path # Cache directory
5896
-
users: list[UserConfig]
5898
-
model_config = SettingsConfigDict(
5899
-
env_prefix="THICKET_",
5901
-
yaml_file="thicket.yaml"
5904
-
class UserConfig(BaseModel):
5906
-
feeds: list[HttpUrl]
5907
-
email: Optional[EmailStr] = None
5908
-
homepage: Optional[HttpUrl] = None
5909
-
icon: Optional[HttpUrl] = None
5910
-
display_name: Optional[str] = None
5913
-
### Feed Storage Format
5915
-
class AtomEntry(BaseModel):
5916
-
id: str # Original Atom ID
5920
-
published: Optional[datetime]
5921
-
summary: Optional[str]
5922
-
content: Optional[str] # Full body content from Atom entry
5923
-
content_type: Optional[str] = "html" # text, html, xhtml
5924
-
author: Optional[dict]
5925
-
categories: list[str] = []
5926
-
rights: Optional[str] = None # Copyright info
5927
-
source: Optional[str] = None # Source feed URL
5928
-
# Additional Atom fields preserved during RSS->Atom conversion
5930
-
model_config = ConfigDict(
5932
-
datetime: lambda v: v.isoformat()
5936
-
class DuplicateMap(BaseModel):
5937
-
"""Maps duplicate entry IDs to canonical entry IDs"""
5938
-
duplicates: dict[str, str] = {} # duplicate_id -> canonical_id
5939
-
comment: str = "Entry IDs that map to the same canonical content"
5941
-
def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
5942
-
"""Add a duplicate mapping"""
5943
-
self.duplicates[duplicate_id] = canonical_id
5945
-
def remove_duplicate(self, duplicate_id: str) -> bool:
5946
-
"""Remove a duplicate mapping. Returns True if existed."""
5947
-
return self.duplicates.pop(duplicate_id, None) is not None
5949
-
def get_canonical(self, entry_id: str) -> str:
5950
-
"""Get canonical ID for an entry (returns original if not duplicate)"""
5951
-
return self.duplicates.get(entry_id, entry_id)
5953
-
def is_duplicate(self, entry_id: str) -> bool:
5954
-
"""Check if entry ID is marked as duplicate"""
5955
-
return entry_id in self.duplicates
5958
-
## Git Repository Structure
5961
-
โโโ index.json # User directory index
5962
-
โโโ duplicates.json # Manual curation of duplicate entries
5963
-
โโโ links.json # Unified links, references, and mapping data
5965
-
โ โโโ entry_id_1.json # Sanitized entry files
5966
-
โ โโโ entry_id_2.json
5972
-
## Key Design Decisions
5974
-
### 1. Feed Normalization & Auto-Discovery
5975
-
- All RSS feeds converted to Atom format before storage
5976
-
- Preserves maximum metadata during conversion
5977
-
- Sanitizes HTML content to prevent XSS
5978
-
- **Auto-discovery**: Extracts user metadata from feed during `add user` command
5980
-
### 2. ID Sanitization
5981
-
- Consistent algorithm to convert Atom IDs to safe filenames
5982
-
- Handles edge cases (very long IDs, special characters)
5983
-
- Maintains reversibility where possible
5985
-
### 3. Git Operations
5986
-
- Uses GitPython for simplicity (no authentication required)
5987
-
- Single main branch for all users and entries
5988
-
- Atomic commits per sync operation
5989
-
- Meaningful commit messages with feed update summaries
5990
-
- Preserves complete history - never delete entries even if they disappear from feeds
5992
-
### 4. Caching Strategy
5993
-
- HTTP caching with Last-Modified/ETag support
5994
-
- Local cache of parsed feeds with TTL
5995
-
- Cache invalidation on configuration changes
5996
-
- Git store serves as permanent historical archive beyond feed depth limits
5998
-
### 5. Error Handling
5999
-
- Graceful handling of feed parsing errors
6000
-
- Retry logic for network failures
6001
-
- Clear error messages with recovery suggestions
6003
-
## CLI Command Structure
6006
-
# Initialize a new git store
6007
-
thicket init /path/to/store
6009
-
# Add a user with feeds (auto-discovers metadata from feed)
6010
-
thicket add user "alyssa" \
6011
-
--feed "https://example.com/feed.atom"
6012
-
# Auto-populates: email, homepage, icon, display_name from feed metadata
6014
-
# Add a user with manual overrides
6015
-
thicket add user "alyssa" \
6016
-
--feed "https://example.com/feed.atom" \
6017
-
--email "alyssa@example.com" \
6018
-
--homepage "https://alyssa.example.com" \
6019
-
--icon "https://example.com/avatar.png" \
6020
-
--display-name "Alyssa P. Hacker"
6022
-
# Add additional feed to existing user
6023
-
thicket add feed "alyssa" "https://example.com/other-feed.rss"
6025
-
# Sync all feeds (designed for cron usage)
6026
-
thicket sync --all
6028
-
# Sync specific user
6029
-
thicket sync --user alyssa
6031
-
# List users and their feeds
6032
-
thicket list users
6033
-
thicket list feeds --user alyssa
6035
-
# Manage duplicate entries
6036
-
thicket duplicates list
6037
-
thicket duplicates add <entry_id_1> <entry_id_2> # Mark as duplicates
6038
-
thicket duplicates remove <entry_id_1> <entry_id_2> # Unmark duplicates
6040
-
# Link processing and threading
6041
-
thicket links --verbose # Extract and categorize all links
6042
-
thicket index --verbose # Build reference index for threading
6043
-
thicket threads # Show conversation threads
6044
-
thicket threads --username user1 # Show threads for specific user
6045
-
thicket threads --min-size 3 # Show threads with minimum size
6048
-
## Performance Considerations
6050
-
1. **Concurrent Feed Fetching**: Use httpx with asyncio for parallel downloads
6051
-
2. **Incremental Updates**: Only fetch/parse feeds that have changed
6052
-
3. **Efficient Git Operations**: Batch commits, use shallow clones where appropriate
6053
-
4. **Progress Feedback**: Rich progress bars for long operations
6055
-
## Security Considerations
6057
-
1. **HTML Sanitization**: Use bleach to clean feed content
6058
-
2. **URL Validation**: Strict validation of feed URLs
6059
-
3. **Git Security**: No credentials stored in repository
6060
-
4. **Path Traversal**: Careful sanitization of filenames
6062
-
## Future Enhancements
6064
-
1. **Web Interface**: Optional web UI for browsing the git store
6065
-
2. **Webhooks**: Notify external services on feed updates
6066
-
3. **Feed Discovery**: Auto-discover feeds from HTML pages
6067
-
4. **Export Formats**: Generate static sites, OPML exports
6068
-
5. **Federation**: P2P sync between thicket instances
6070
-
## Requirements Clarification
6072
-
**โ Resolved Requirements:**
6073
-
1. **Feed Update Frequency**: Designed for cron usage - no built-in scheduling needed
6074
-
2. **Duplicate Handling**: Manual curation via `duplicates.json` file with CLI commands
6075
-
3. **Git Branching**: Single main branch for all users and entries
6076
-
4. **Authentication**: No feeds require authentication currently
6077
-
5. **Content Storage**: Store complete Atom entry body content as provided
6078
-
6. **Deleted Entries**: Preserve all entries in Git store permanently (historical archive)
6079
-
7. **History Depth**: Git store maintains full history beyond feed depth limits
6080
-
8. **Feed Auto-Discovery**: Extract user metadata from feed during `add user` command
6082
-
## Duplicate Entry Management
6084
-
### Duplicate Detection Strategy
6085
-
- **Manual Curation**: Duplicates identified and managed manually via CLI
6086
-
- **Storage**: `duplicates.json` file in Git root maps entry IDs to canonical entries
6087
-
- **Structure**: `{"duplicate_id": "canonical_id", ...}`
6088
-
- **CLI Commands**: Add/remove duplicate mappings with validation
6089
-
- **Query Resolution**: Search/list commands resolve duplicates to canonical entries
6091
-
### Duplicate File Format
6094
-
"https://example.com/feed/entry/123": "https://canonical.com/posts/same-post",
6095
-
"https://mirror.com/articles/456": "https://canonical.com/posts/same-post",
6096
-
"comment": "Entry IDs that map to the same canonical content"
6100
-
## Feed Metadata Auto-Discovery
6102
-
### Extraction Strategy
6103
-
When adding a new user with `thicket add user`, the system fetches and parses the feed to extract:
6105
-
- **Display Name**: From `feed.title` or `feed.author.name`
6106
-
- **Email**: From `feed.author.email` or `feed.managingEditor`
6107
-
- **Homepage**: From `feed.link` or `feed.author.uri`
6108
-
- **Icon**: From `feed.logo`, `feed.icon`, or `feed.image.url`
6110
-
### Discovery Priority Order
6111
-
1. **Author Information**: Prefer `feed.author.*` fields (more specific to person)
6112
-
2. **Feed-Level**: Fall back to feed-level metadata
6113
-
3. **Manual Override**: CLI flags always take precedence over discovered values
6114
-
4. **Update Behavior**: Auto-discovery only runs during initial `add user`, not on sync
6116
-
### Extracted Metadata Format
6118
-
class FeedMetadata(BaseModel):
6119
-
title: Optional[str] = None
6120
-
author_name: Optional[str] = None
6121
-
author_email: Optional[EmailStr] = None
6122
-
author_uri: Optional[HttpUrl] = None
6123
-
link: Optional[HttpUrl] = None
6124
-
logo: Optional[HttpUrl] = None
6125
-
icon: Optional[HttpUrl] = None
6126
-
image_url: Optional[HttpUrl] = None
6128
-
def to_user_config(self, username: str, feed_url: HttpUrl) -> UserConfig:
6129
-
"""Convert discovered metadata to UserConfig with fallbacks"""
6130
-
return UserConfig(
6131
-
username=username,
6133
-
display_name=self.author_name or self.title,
6134
-
email=self.author_email,
6135
-
homepage=self.author_uri or self.link,
6136
-
icon=self.logo or self.icon or self.image_url
6140
-
## Link Processing and Threading Architecture
6143
-
The thicket system implements a sophisticated link processing and threading system to create email-style threaded views of blog entries by tracking cross-references between different blogs.
6145
-
### Link Processing Pipeline
6147
-
#### 1. Link Extraction (`thicket links`)
6148
-
The `links` command systematically extracts all outbound links from blog entries and categorizes them:
6151
-
class LinkData(BaseModel):
6152
-
url: str # Fully resolved URL
6153
-
entry_id: str # Source entry ID
6154
-
username: str # Source username
6155
-
context: str # Surrounding text context
6156
-
category: str # "internal", "user", or "unknown"
6157
-
target_username: Optional[str] # Target user if applicable
6160
-
**Link Categories:**
6161
-
- **Internal**: Links to the same user's domain (self-references)
6162
-
- **User**: Links to other tracked users' domains
6163
-
- **Unknown**: Links to external sites not tracked by thicket
6165
-
#### 2. URL Resolution
6166
-
All links are properly resolved using the Atom feed's base URL to handle:
6167
-
- Relative URLs (converted to absolute)
6168
-
- Protocol-relative URLs
6169
-
- Fragment identifiers
6170
-
- Redirects and canonical URLs
6172
-
#### 3. Domain Mapping
6173
-
The system builds a comprehensive domain mapping from user configuration:
6174
-
- Feed URLs โ domain extraction
6175
-
- Homepage URLs โ domain extraction
6176
-
- Reverse mapping: domain โ username
6178
-
### Threading System
6180
-
#### 1. Reference Index Generation (`thicket index`)
6181
-
Creates a bidirectional reference index from the categorized links:
6184
-
class BlogReference(BaseModel):
6185
-
source_entry_id: str
6186
-
source_username: str
6188
-
target_username: Optional[str]
6189
-
target_entry_id: Optional[str]
6193
-
#### 2. Thread Detection Algorithm
6194
-
Uses graph traversal to find connected blog entries:
6195
-
- **Outbound references**: Links from an entry to other entries
6196
-
- **Inbound references**: Links to an entry from other entries
6197
-
- **Thread members**: All entries connected through references
6199
-
#### 3. Threading Display (`thicket threads`)
6200
-
Creates email-style threaded views:
6201
-
- Chronological ordering within threads
6202
-
- Reference counts (outbound/inbound)
6203
-
- Context preservation
6204
-
- Filtering options (user, entry, minimum size)
6206
-
### Data Structures
6208
-
#### links.json Format (Unified Structure)
6212
-
"https://example.com/post/123": {
6213
-
"referencing_entries": ["https://blog.user.com/entry/456"],
6214
-
"target_username": "user2"
6216
-
"https://external-site.com/article": {
6217
-
"referencing_entries": ["https://blog.user.com/entry/789"]
6220
-
"reverse_mapping": {
6221
-
"https://blog.user.com/entry/456": ["https://example.com/post/123"],
6222
-
"https://blog.user.com/entry/789": ["https://external-site.com/article"]
6226
-
"source_entry_id": "https://blog.user.com/entry/456",
6227
-
"source_username": "user1",
6228
-
"target_url": "https://example.com/post/123",
6229
-
"target_username": "user2",
6230
-
"target_entry_id": "https://example.com/post/123",
6231
-
"context": "As mentioned in this post..."
6235
-
"user1": ["blog.user.com"],
6236
-
"user2": ["example.com"]
6241
-
This unified structure eliminates duplication by:
6242
-
- Storing each URL only once with minimal metadata
6243
-
- Including all link data, reference data, and mappings in one file
6244
-
- Using presence of `target_username` to identify tracked vs external links
6245
-
- Providing bidirectional mappings for efficient queries
6247
-
### Unified Structure Benefits
6249
-
- **Eliminates Duplication**: Each URL appears only once with metadata
6250
-
- **Single Source of Truth**: All link-related data in one file
6251
-
- **Efficient Queries**: Fast lookups for both directions (URLโentries, entryโURLs)
6252
-
- **Atomic Updates**: All link data changes together
6253
-
- **Reduced I/O**: Fewer file operations
6255
-
### Implementation Benefits
6257
-
1. **Systematic Link Processing**: All links are extracted and categorized consistently
6258
-
2. **Proper URL Resolution**: Handles relative URLs and base URL resolution correctly
6259
-
3. **Domain-based Categorization**: Automatically identifies user-to-user references
6260
-
4. **Bidirectional Indexing**: Supports both "who links to whom" and "who is linked by whom"
6261
-
5. **Thread Discovery**: Finds conversation threads automatically
6262
-
6. **Rich Context**: Preserves surrounding text for each link
6263
-
7. **Performance**: Pre-computed indexes for fast threading queries
6268
-
# Extract and categorize all links
6269
-
thicket links --verbose
6271
-
# Build reference index for threading
6272
-
thicket index --verbose
6274
-
# Show all conversation threads
6277
-
# Show threads for specific user
6278
-
thicket threads --username user1
6280
-
# Show threads with minimum size
6281
-
thicket threads --min-size 3
6284
-
### Integration with Existing Commands
6286
-
The link processing system integrates seamlessly with existing thicket commands:
6287
-
- `thicket sync` updates entries, requiring `thicket links` to be run afterward
6288
-
- `thicket index` uses the output from `thicket links` for improved accuracy
6289
-
- `thicket threads` provides the user-facing threading interface
6291
-
## Current Implementation Status
6293
-
### โ
Completed Features
6294
-
1. **Core Infrastructure**
6295
-
- Modern CLI with Typer and Rich
6296
-
- Pydantic data models for type safety
6297
-
- Git repository operations with GitPython
6298
-
- Feed parsing and normalization with feedparser
6300
-
2. **User and Feed Management**
6301
-
- `thicket init` - Initialize git store
6302
-
- `thicket add` - Add users and feeds with auto-discovery
6303
-
- `thicket sync` - Sync feeds with progress tracking
6304
-
- `thicket list` - List users, feeds, and entries
6305
-
- `thicket duplicates` - Manage duplicate entries
6307
-
3. **Link Processing and Threading**
6308
-
- `thicket links` - Extract and categorize all outbound links
6309
-
- `thicket index` - Build reference index from links
6310
-
- `thicket threads` - Display threaded conversation views
6311
-
- Proper URL resolution with base URL handling
6312
-
- Domain-based link categorization
6313
-
- Context preservation for links
6315
-
### ๐ System Performance
6316
-
- **Link Extraction**: Successfully processes thousands of blog entries
6317
-
- **Categorization**: Identifies internal, user, and unknown links
6318
-
- **Threading**: Creates email-style threaded views of conversations
6319
-
- **Storage**: Efficient JSON-based data structures for links and references
6321
-
### ๐ง Current Architecture Highlights
6322
-
- **Modular Design**: Clear separation between CLI, core logic, and models
6323
-
- **Type Safety**: Comprehensive Pydantic models for data validation
6324
-
- **Rich CLI**: Beautiful progress bars, tables, and error handling
6325
-
- **Extensible**: Easy to add new commands and features
6326
-
- **Git Integration**: All data stored in version-controlled JSON files
6328
-
### ๐ฏ Proven Functionality
6329
-
The system has been tested with real blog data and successfully:
6330
-
- Extracted 14,396 total links from blog entries
6331
-
- Categorized 3,994 internal links, 363 user-to-user links, and 10,039 unknown links
6332
-
- Built comprehensive domain mappings for 16 users across 20 domains
6333
-
- Generated threaded views showing blog conversation patterns
6335
-
### ๐ Ready for Use
6336
-
The thicket system is now fully functional for:
6337
-
- Maintaining Git repositories of blog feeds
6338
-
- Tracking cross-references between blogs
6339
-
- Creating threaded views of blog conversations
6340
-
- Discovering blog interaction patterns
6341
-
- Building distributed comment systems
6344
-
<file path="src/thicket/cli/utils.py">
6345
-
"""CLI utilities and helpers."""
6347
-
from pathlib import Path
6348
-
from typing import Optional
6351
-
from rich.console import Console
6352
-
from rich.progress import Progress, SpinnerColumn, TextColumn
6353
-
from rich.table import Table
6355
-
from ..models import ThicketConfig, UserMetadata
6356
-
from ..core.git_store import GitStore
6358
-
console = Console()
6361
-
def get_tsv_mode() -> bool:
6362
-
"""Get the global TSV mode setting."""
6363
-
from .main import tsv_mode
6367
-
def load_config(config_path: Optional[Path] = None) -> ThicketConfig:
6368
-
"""Load thicket configuration from file or environment."""
6369
-
if config_path and config_path.exists():
6372
-
with open(config_path) as f:
6373
-
config_data = yaml.safe_load(f)
6375
-
# Convert to ThicketConfig
6376
-
return ThicketConfig(**config_data)
6378
-
# Try to load from default locations or environment
6380
-
# First try to find thicket.yaml in current directory
6381
-
default_config = Path("thicket.yaml")
6382
-
if default_config.exists():
6384
-
with open(default_config) as f:
6385
-
config_data = yaml.safe_load(f)
6386
-
return ThicketConfig(**config_data)
6388
-
# Fall back to environment variables
6389
-
return ThicketConfig()
6390
-
except Exception as e:
6391
-
console.print(f"[red]Error loading configuration: {e}[/red]")
6392
-
console.print("[yellow]Run 'thicket init' to create a new configuration.[/yellow]")
6393
-
raise typer.Exit(1) from e
6396
-
def save_config(config: ThicketConfig, config_path: Path) -> None:
6397
-
"""Save thicket configuration to file."""
6400
-
config_data = config.model_dump(mode="json", exclude_none=True)
6402
-
# Convert Path objects to strings for YAML serialization
6403
-
config_data["git_store"] = str(config_data["git_store"])
6404
-
config_data["cache_dir"] = str(config_data["cache_dir"])
6406
-
with open(config_path, "w") as f:
6407
-
yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
6410
-
def create_progress() -> Progress:
6411
-
"""Create a Rich progress display."""
6414
-
TextColumn("[progress.description]{task.description}"),
6420
-
def print_users_table(config: ThicketConfig) -> None:
6421
-
"""Print a table of users and their feeds."""
6422
-
if get_tsv_mode():
6423
-
print_users_tsv(config)
6426
-
table = Table(title="Users and Feeds")
6427
-
table.add_column("Username", style="cyan", no_wrap=True)
6428
-
table.add_column("Display Name", style="magenta")
6429
-
table.add_column("Email", style="blue")
6430
-
table.add_column("Homepage", style="green")
6431
-
table.add_column("Feeds", style="yellow")
6433
-
for user in config.users:
6434
-
feeds_str = "\n".join(str(feed) for feed in user.feeds)
6437
-
user.display_name or "",
6439
-
str(user.homepage) if user.homepage else "",
6443
-
console.print(table)
6446
-
def print_feeds_table(config: ThicketConfig, username: Optional[str] = None) -> None:
6447
-
"""Print a table of feeds, optionally filtered by username."""
6448
-
if get_tsv_mode():
6449
-
print_feeds_tsv(config, username)
6452
-
table = Table(title=f"Feeds{f' for {username}' if username else ''}")
6453
-
table.add_column("Username", style="cyan", no_wrap=True)
6454
-
table.add_column("Feed URL", style="blue")
6455
-
table.add_column("Status", style="green")
6457
-
users = [config.find_user(username)] if username else config.users
6458
-
users = [u for u in users if u is not None]
6460
-
for user in users:
6461
-
for feed in user.feeds:
6465
-
"Active", # TODO: Add actual status checking
6468
-
console.print(table)
6471
-
def confirm_action(message: str, default: bool = False) -> bool:
6472
-
"""Prompt for confirmation."""
6473
-
return typer.confirm(message, default=default)
6476
-
def print_success(message: str) -> None:
6477
-
"""Print a success message."""
6478
-
console.print(f"[green]โ[/green] {message}")
6481
-
def print_error(message: str) -> None:
6482
-
"""Print an error message."""
6483
-
console.print(f"[red]โ[/red] {message}")
6486
-
def print_warning(message: str) -> None:
6487
-
"""Print a warning message."""
6488
-
console.print(f"[yellow]โ [/yellow] {message}")
6491
-
def print_info(message: str) -> None:
6492
-
"""Print an info message."""
6493
-
console.print(f"[blue]โน[/blue] {message}")
6496
-
def print_users_table_from_git(users: list[UserMetadata]) -> None:
6497
-
"""Print a table of users from git repository."""
6498
-
if get_tsv_mode():
6499
-
print_users_tsv_from_git(users)
6502
-
table = Table(title="Users and Feeds")
6503
-
table.add_column("Username", style="cyan", no_wrap=True)
6504
-
table.add_column("Display Name", style="magenta")
6505
-
table.add_column("Email", style="blue")
6506
-
table.add_column("Homepage", style="green")
6507
-
table.add_column("Feeds", style="yellow")
6509
-
for user in users:
6510
-
feeds_str = "\n".join(user.feeds)
6513
-
user.display_name or "",
6515
-
user.homepage or "",
6519
-
console.print(table)
6522
-
def print_feeds_table_from_git(git_store: GitStore, username: Optional[str] = None) -> None:
6523
-
"""Print a table of feeds from git repository."""
6524
-
if get_tsv_mode():
6525
-
print_feeds_tsv_from_git(git_store, username)
6528
-
table = Table(title=f"Feeds{f' for {username}' if username else ''}")
6529
-
table.add_column("Username", style="cyan", no_wrap=True)
6530
-
table.add_column("Feed URL", style="blue")
6531
-
table.add_column("Status", style="green")
6534
-
user = git_store.get_user(username)
6535
-
users = [user] if user else []
6537
-
index = git_store._load_index()
6538
-
users = list(index.users.values())
6540
-
for user in users:
6541
-
for feed in user.feeds:
6545
-
"Active", # TODO: Add actual status checking
6548
-
console.print(table)
6551
-
def print_users_tsv(config: ThicketConfig) -> None:
6552
-
"""Print users in TSV format."""
6553
-
print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")
6554
-
for user in config.users:
6555
-
feeds_str = ",".join(str(feed) for feed in user.feeds)
6556
-
print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")
6559
-
def print_users_tsv_from_git(users: list[UserMetadata]) -> None:
6560
-
"""Print users from git repository in TSV format."""
6561
-
print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")
6562
-
for user in users:
6563
-
feeds_str = ",".join(user.feeds)
6564
-
print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")
6567
-
def print_feeds_tsv(config: ThicketConfig, username: Optional[str] = None) -> None:
6568
-
"""Print feeds in TSV format."""
6569
-
print("Username\tFeed URL\tStatus")
6570
-
users = [config.find_user(username)] if username else config.users
6571
-
users = [u for u in users if u is not None]
6573
-
for user in users:
6574
-
for feed in user.feeds:
6575
-
print(f"{user.username}\t{feed}\tActive")
6578
-
def print_feeds_tsv_from_git(git_store: GitStore, username: Optional[str] = None) -> None:
6579
-
"""Print feeds from git repository in TSV format."""
6580
-
print("Username\tFeed URL\tStatus")
6583
-
user = git_store.get_user(username)
6584
-
users = [user] if user else []
6586
-
index = git_store._load_index()
6587
-
users = list(index.users.values())
6589
-
for user in users:
6590
-
for feed in user.feeds:
6591
-
print(f"{user.username}\t{feed}\tActive")
6594
-
def print_entries_tsv(entries_by_user: list[list], usernames: list[str]) -> None:
6595
-
"""Print entries in TSV format."""
6596
-
print("User\tAtom ID\tTitle\tUpdated\tURL")
6598
-
# Combine all entries with usernames
6600
-
for entries, username in zip(entries_by_user, usernames):
6601
-
for entry in entries:
6602
-
all_entries.append((username, entry))
6604
-
# Sort by updated time (newest first)
6605
-
all_entries.sort(key=lambda x: x[1].updated, reverse=True)
6607
-
for username, entry in all_entries:
6608
-
# Format updated time
6609
-
updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")
6611
-
# Escape tabs and newlines in title to preserve TSV format
6612
-
title = entry.title.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
6614
-
print(f"{username}\t{entry.id}\t{title}\t{updated_str}\t{entry.link}")