repomix-output.xml at old-main · anil.recoil.org/thicket

anil.recoil.org / thicket
Manage Atom feeds in a persistent git repository
thicket / repomix-output.xml
at old-main 221 kB view raw
   1This file is a merged representation of the entire codebase, combined into a single document by Repomix.
   2
   3<file_summary>
   4This section contains a summary of this file.
   5
   6<purpose>
   7This file contains a packed representation of the entire repository's contents.
   8It is designed to be easily consumable by AI systems for analysis, code review,
   9or other automated processes.
  10</purpose>
  11
  12<file_format>
  13The content is organized as follows:
  141. This summary section
  152. Repository information
  163. Directory structure
  174. Repository files (if enabled)
  185. Multiple file entries, each consisting of:
  19  - File path as an attribute
  20  - Full contents of the file
  21</file_format>
  22
  23<usage_guidelines>
  24- This file should be treated as read-only. Any changes should be made to the
  25  original repository files, not this packed version.
  26- When processing this file, use the file path to distinguish
  27  between different files in the repository.
  28- Be aware that this file may contain sensitive information. Handle it with
  29  the same level of security as you would the original repository.
  30</usage_guidelines>
  31
  32<notes>
  33- Some files may have been excluded based on .gitignore rules and Repomix's configuration
  34- Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files
  35- Files matching patterns in .gitignore are excluded
  36- Files matching default ignore patterns are excluded
  37- Files are sorted by Git change count (files with more changes are at the bottom)
  38</notes>
  39
  40</file_summary>
  41
  42<directory_structure>
  43.claude/
  44  settings.local.json
  45src/
  46  thicket/
  47    cli/
  48      commands/
  49        __init__.py
  50        add.py
  51        duplicates.py
  52        generate.py
  53        index_cmd.py
  54        info_cmd.py
  55        init.py
  56        links_cmd.py
  57        list_cmd.py
  58        sync.py
  59      __init__.py
  60      main.py
  61      utils.py
  62    core/
  63      __init__.py
  64      feed_parser.py
  65      git_store.py
  66      reference_parser.py
  67    models/
  68      __init__.py
  69      config.py
  70      feed.py
  71      user.py
  72    templates/
  73      base.html
  74      index.html
  75      links.html
  76      script.js
  77      style.css
  78      timeline.html
  79      users.html
  80    utils/
  81      __init__.py
  82    __init__.py
  83    __main__.py
  84.gitignore
  85ARCH.md
  86CLAUDE.md
  87pyproject.toml
  88README.md
  89</directory_structure>
  90
  91<files>
  92This section contains the contents of the repository's files.
  93
  94<file path=".claude/settings.local.json">
  95{
  96  "permissions": {
  97    "allow": [
  98      "Bash(find:*)",
  99      "Bash(uv run:*)",
 100      "Bash(grep:*)",
 101      "Bash(jq:*)",
 102      "Bash(git add:*)",
 103      "Bash(ls:*)"
 104    ]
 105  },
 106  "enableAllProjectMcpServers": false
 107}
 108</file>
 109
 110<file path="src/thicket/cli/commands/generate.py">
 111"""Generate static HTML website from thicket data."""
 112
 113import base64
 114import json
 115import re
 116import shutil
 117from datetime import datetime
 118from pathlib import Path
 119from typing import Any, Optional, TypedDict, Union
 120
 121import typer
 122from jinja2 import Environment, FileSystemLoader, select_autoescape
 123from rich.progress import Progress, SpinnerColumn, TextColumn
 124
 125from ...core.git_store import GitStore
 126from ...models.feed import AtomEntry
 127from ...models.user import GitStoreIndex, UserMetadata
 128from ..main import app
 129from ..utils import console, load_config
 130
 131
 132class UserData(TypedDict):
 133    """Type definition for user data structure."""
 134
 135    metadata: UserMetadata
 136    recent_entries: list[tuple[str, AtomEntry]]
 137
 138
 139def safe_anchor_id(atom_id: str) -> str:
 140    """Convert an Atom ID to a safe HTML anchor ID."""
 141    # Use base64 URL-safe encoding without padding
 142    encoded = base64.urlsafe_b64encode(atom_id.encode('utf-8')).decode('ascii').rstrip('=')
 143    # Prefix with 'id' to ensure it starts with a letter (HTML requirement)
 144    return f"id{encoded}"
 145
 146
 147class WebsiteGenerator:
 148    """Generate static HTML website from thicket data."""
 149
 150    def __init__(self, git_store: GitStore, output_dir: Path):
 151        self.git_store = git_store
 152        self.output_dir = output_dir
 153        self.template_dir = Path(__file__).parent.parent.parent / "templates"
 154
 155        # Initialize Jinja2 environment
 156        self.env = Environment(
 157            loader=FileSystemLoader(self.template_dir),
 158            autoescape=select_autoescape(["html", "xml"]),
 159        )
 160
 161        # Data containers
 162        self.index: Optional[GitStoreIndex] = None
 163        self.entries: list[tuple[str, AtomEntry]] = []  # (username, entry)
 164        self.links_data: Optional[dict[str, Any]] = None
 165        self.threads: list[list[dict[str, Any]]] = []  # List of threads with metadata
 166
 167    def get_display_name(self, username: str) -> str:
 168        """Get display name for a user, falling back to username."""
 169        if self.index and username in self.index.users:
 170            user = self.index.users[username]
 171            return user.display_name or username
 172        return username
 173
 174    def get_user_homepage(self, username: str) -> Optional[str]:
 175        """Get homepage URL for a user."""
 176        if self.index and username in self.index.users:
 177            user = self.index.users[username]
 178            return str(user.homepage) if user.homepage else None
 179        return None
 180
 181    def clean_html_summary(self, content: Optional[str], max_length: int = 200) -> str:
 182        """Clean HTML content and truncate for display in timeline."""
 183        if not content:
 184            return ""
 185
 186        # Remove HTML tags
 187        clean_text = re.sub(r"<[^>]+>", " ", content)
 188        # Replace multiple whitespace with single space
 189        clean_text = re.sub(r"\s+", " ", clean_text)
 190        # Strip leading/trailing whitespace
 191        clean_text = clean_text.strip()
 192
 193        # Truncate with ellipsis if needed
 194        if len(clean_text) > max_length:
 195            # Try to break at word boundary
 196            truncated = clean_text[:max_length]
 197            last_space = truncated.rfind(" ")
 198            if (
 199                last_space > max_length * 0.8
 200            ):  # If we can break reasonably close to the limit
 201                clean_text = truncated[:last_space] + "..."
 202            else:
 203                clean_text = truncated + "..."
 204
 205        return clean_text
 206
 207    def load_data(self) -> None:
 208        """Load all data from the git repository."""
 209        with Progress(
 210            SpinnerColumn(),
 211            TextColumn("[progress.description]{task.description}"),
 212            console=console,
 213        ) as progress:
 214            # Load index
 215            task = progress.add_task("Loading repository index...", total=None)
 216            self.index = self.git_store._load_index()
 217            if not self.index:
 218                raise ValueError("No index found in repository")
 219            progress.update(task, completed=True)
 220
 221            # Load all entries
 222            task = progress.add_task("Loading entries...", total=None)
 223            for username, user_metadata in self.index.users.items():
 224                user_dir = self.git_store.repo_path / user_metadata.directory
 225                if user_dir.exists():
 226                    for entry_file in user_dir.glob("*.json"):
 227                        if entry_file.name not in ["index.json", "duplicates.json"]:
 228                            try:
 229                                with open(entry_file) as f:
 230                                    entry_data = json.load(f)
 231                                    entry = AtomEntry(**entry_data)
 232                                    self.entries.append((username, entry))
 233                            except Exception as e:
 234                                console.print(
 235                                    f"[yellow]Warning: Failed to load {entry_file}: {e}[/yellow]"
 236                                )
 237            progress.update(task, completed=True)
 238
 239            # Sort entries by date (newest first) - prioritize updated over published
 240            self.entries.sort(
 241                key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True
 242            )
 243
 244            # Load links data
 245            task = progress.add_task("Loading links and references...", total=None)
 246            links_file = self.git_store.repo_path / "links.json"
 247            if links_file.exists():
 248                with open(links_file) as f:
 249                    self.links_data = json.load(f)
 250            progress.update(task, completed=True)
 251
 252    def build_threads(self) -> None:
 253        """Build threaded conversations from references."""
 254        if not self.links_data or "references" not in self.links_data:
 255            return
 256
 257        # Map entry IDs to (username, entry) tuples
 258        entry_map: dict[str, tuple[str, AtomEntry]] = {}
 259        for username, entry in self.entries:
 260            entry_map[entry.id] = (username, entry)
 261
 262        # Build adjacency lists for references
 263        self.outbound_refs: dict[str, set[str]] = {}
 264        self.inbound_refs: dict[str, set[str]] = {}
 265        self.reference_details: dict[
 266            str, list[dict[str, Any]]
 267        ] = {}  # Store full reference info
 268
 269        for ref in self.links_data["references"]:
 270            source_id = ref["source_entry_id"]
 271            target_id = ref.get("target_entry_id")
 272
 273            if target_id and source_id in entry_map and target_id in entry_map:
 274                self.outbound_refs.setdefault(source_id, set()).add(target_id)
 275                self.inbound_refs.setdefault(target_id, set()).add(source_id)
 276
 277                # Store reference details for UI
 278                self.reference_details.setdefault(source_id, []).append(
 279                    {
 280                        "target_id": target_id,
 281                        "target_username": ref.get("target_username"),
 282                        "type": "outbound",
 283                    }
 284                )
 285                self.reference_details.setdefault(target_id, []).append(
 286                    {
 287                        "source_id": source_id,
 288                        "source_username": ref.get("source_username"),
 289                        "type": "inbound",
 290                    }
 291                )
 292
 293        # Find conversation threads (multi-post discussions)
 294        processed = set()
 295
 296        for entry_id, (_username, _entry) in entry_map.items():
 297            if entry_id in processed:
 298                continue
 299
 300            # Build thread starting from this entry
 301            thread = []
 302            to_visit = [entry_id]
 303            thread_ids = set()
 304            level_map: dict[str, int] = {}  # Track levels for this thread
 305
 306            # First, traverse up to find the root
 307            current = entry_id
 308            while current in self.inbound_refs:
 309                parents = self.inbound_refs[current] - {
 310                    current
 311                }  # Exclude self-references
 312                if not parents:
 313                    break
 314                # Take the first parent
 315                parent = next(iter(parents))
 316                if parent in thread_ids:  # Avoid cycles
 317                    break
 318                current = parent
 319                to_visit.insert(0, current)
 320
 321            # Now traverse down from the root
 322            while to_visit:
 323                current = to_visit.pop(0)
 324                if current in thread_ids or current not in entry_map:
 325                    continue
 326
 327                thread_ids.add(current)
 328                username, entry = entry_map[current]
 329
 330                # Calculate thread level
 331                thread_level = self._calculate_thread_level(current, level_map)
 332
 333                # Add threading metadata
 334                thread_entry = {
 335                    "username": username,
 336                    "display_name": self.get_display_name(username),
 337                    "entry": entry,
 338                    "entry_id": current,
 339                    "references_to": list(self.outbound_refs.get(current, [])),
 340                    "referenced_by": list(self.inbound_refs.get(current, [])),
 341                    "thread_level": thread_level,
 342                }
 343                thread.append(thread_entry)
 344                processed.add(current)
 345
 346                # Add children
 347                if current in self.outbound_refs:
 348                    children = self.outbound_refs[current] - thread_ids  # Avoid cycles
 349                    to_visit.extend(sorted(children))
 350
 351            if len(thread) > 1:  # Only keep actual threads
 352                # Sort thread by date (newest first) - prioritize updated over published
 353                thread.sort(key=lambda x: x["entry"].updated or x["entry"].published or datetime.min, reverse=True)  # type: ignore
 354                self.threads.append(thread)
 355
 356        # Sort threads by the date of their most recent entry - prioritize updated over published
 357        self.threads.sort(
 358            key=lambda t: max(
 359                item["entry"].updated or item["entry"].published or datetime.min for item in t
 360            ),
 361            reverse=True,
 362        )
 363
 364    def _calculate_thread_level(
 365        self, entry_id: str, processed_entries: dict[str, int]
 366    ) -> int:
 367        """Calculate indentation level for threaded display."""
 368        if entry_id in processed_entries:
 369            return processed_entries[entry_id]
 370
 371        if entry_id not in self.inbound_refs:
 372            processed_entries[entry_id] = 0
 373            return 0
 374
 375        parents_in_thread = self.inbound_refs[entry_id] & set(processed_entries.keys())
 376        if not parents_in_thread:
 377            processed_entries[entry_id] = 0
 378            return 0
 379
 380        # Find the deepest parent level + 1
 381        max_parent_level = 0
 382        for parent_id in parents_in_thread:
 383            parent_level = self._calculate_thread_level(parent_id, processed_entries)
 384            max_parent_level = max(max_parent_level, parent_level)
 385
 386        level = min(max_parent_level + 1, 4)  # Cap at level 4
 387        processed_entries[entry_id] = level
 388        return level
 389
 390    def get_standalone_references(self) -> list[dict[str, Any]]:
 391        """Get posts that have references but aren't part of multi-post threads."""
 392        if not hasattr(self, "reference_details"):
 393            return []
 394
 395        threaded_entry_ids = set()
 396        for thread in self.threads:
 397            for item in thread:
 398                threaded_entry_ids.add(item["entry_id"])
 399
 400        standalone_refs = []
 401        for username, entry in self.entries:
 402            if (
 403                entry.id in self.reference_details
 404                and entry.id not in threaded_entry_ids
 405            ):
 406                refs = self.reference_details[entry.id]
 407                # Only include if it has meaningful references (not just self-references)
 408                meaningful_refs = [
 409                    r
 410                    for r in refs
 411                    if r.get("target_id") != entry.id and r.get("source_id") != entry.id
 412                ]
 413                if meaningful_refs:
 414                    standalone_refs.append(
 415                        {
 416                            "username": username,
 417                            "display_name": self.get_display_name(username),
 418                            "entry": entry,
 419                            "references": meaningful_refs,
 420                        }
 421                    )
 422
 423        return standalone_refs
 424
 425    def _add_cross_thread_links(self, timeline_items: list[dict[str, Any]]) -> None:
 426        """Add cross-thread linking for entries that appear in multiple threads."""
 427        # Map entry IDs to their positions in the timeline
 428        entry_positions: dict[str, list[int]] = {}
 429        # Map URLs referenced by entries to the entries that reference them
 430        url_references: dict[str, list[tuple[str, int]]] = {}  # url -> [(entry_id, position)]
 431
 432        # First pass: collect all entry IDs, their positions, and referenced URLs
 433        for i, item in enumerate(timeline_items):
 434            if item["type"] == "post":
 435                entry_id = item["content"]["entry"].id
 436                entry_positions.setdefault(entry_id, []).append(i)
 437                # Track URLs this entry references
 438                if entry_id in self.reference_details:
 439                    for ref in self.reference_details[entry_id]:
 440                        if ref["type"] == "outbound" and "target_id" in ref:
 441                            # Find the target entry's URL if available
 442                            target_entry = self._find_entry_by_id(ref["target_id"])
 443                            if target_entry and target_entry.link:
 444                                url = str(target_entry.link)
 445                                url_references.setdefault(url, []).append((entry_id, i))
 446            elif item["type"] == "thread":
 447                for thread_item in item["content"]:
 448                    entry_id = thread_item["entry"].id
 449                    entry_positions.setdefault(entry_id, []).append(i)
 450                    # Track URLs this entry references
 451                    if entry_id in self.reference_details:
 452                        for ref in self.reference_details[entry_id]:
 453                            if ref["type"] == "outbound" and "target_id" in ref:
 454                                target_entry = self._find_entry_by_id(ref["target_id"])
 455                                if target_entry and target_entry.link:
 456                                    url = str(target_entry.link)
 457                                    url_references.setdefault(url, []).append((entry_id, i))
 458
 459        # Build cross-thread connections - only for entries that actually appear multiple times
 460        cross_thread_connections: dict[str, set[int]] = {}  # entry_id -> set of timeline positions
 461
 462        # Add connections ONLY for entries that appear multiple times in the timeline
 463        for entry_id, positions in entry_positions.items():
 464            if len(positions) > 1:
 465                cross_thread_connections[entry_id] = set(positions)
 466                # Debug: uncomment to see which entries have multiple appearances
 467                # print(f"Entry {entry_id[:50]}... appears at positions: {positions}")
 468
 469        # Apply cross-thread links to timeline items
 470        for entry_id, positions_set in cross_thread_connections.items():
 471            positions_list = list(positions_set)
 472            for pos in positions_list:
 473                item = timeline_items[pos]
 474                other_positions = sorted([p for p in positions_list if p != pos])
 475
 476                if item["type"] == "post":
 477                    # Add cross-thread info to individual posts
 478                    item["content"]["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items)
 479                    # Add info about shared references
 480                    item["content"]["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items)
 481                elif item["type"] == "thread":
 482                    # Add cross-thread info to thread items
 483                    for thread_item in item["content"]:
 484                        if thread_item["entry"].id == entry_id:
 485                            thread_item["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items)
 486                            thread_item["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items)
 487                            break
 488
 489    def _build_cross_thread_link_data(self, entry_id: str, other_positions: list[int], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
 490        """Build detailed cross-thread link data with anchor information."""
 491        cross_thread_links = []
 492
 493        for pos in other_positions:
 494            item = timeline_items[pos]
 495            if item["type"] == "post":
 496                # For individual posts
 497                safe_id = safe_anchor_id(entry_id)
 498                cross_thread_links.append({
 499                    "position": pos,
 500                    "anchor_id": f"post-{pos}-{safe_id}",
 501                    "context": "individual post",
 502                    "title": item["content"]["entry"].title
 503                })
 504            elif item["type"] == "thread":
 505                # For thread items, find the specific thread item
 506                for thread_idx, thread_item in enumerate(item["content"]):
 507                    if thread_item["entry"].id == entry_id:
 508                        safe_id = safe_anchor_id(entry_id)
 509                        cross_thread_links.append({
 510                            "position": pos,
 511                            "anchor_id": f"post-{pos}-{thread_idx}-{safe_id}",
 512                            "context": f"thread (level {thread_item.get('thread_level', 0)})",
 513                            "title": thread_item["entry"].title
 514                        })
 515                        break
 516
 517        return cross_thread_links
 518
 519    def _find_entry_by_id(self, entry_id: str) -> Optional[AtomEntry]:
 520        """Find an entry by its ID."""
 521        for _username, entry in self.entries:
 522            if entry.id == entry_id:
 523                return entry
 524        return None
 525
 526    def _get_shared_references(self, entry_id: str, positions: Union[set[int], list[int]], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
 527        """Get information about shared references between cross-thread entries."""
 528        shared_refs = []
 529
 530        # Collect all referenced URLs from entries at these positions
 531        url_counts: dict[str, int] = {}
 532        referencing_entries: dict[str, list[str]] = {}  # url -> [entry_ids]
 533
 534        for pos in positions:
 535            item = timeline_items[pos]
 536            entries_to_check = []
 537
 538            if item["type"] == "post":
 539                entries_to_check.append(item["content"]["entry"])
 540            elif item["type"] == "thread":
 541                entries_to_check.extend([ti["entry"] for ti in item["content"]])
 542
 543            for entry in entries_to_check:
 544                if entry.id in self.reference_details:
 545                    for ref in self.reference_details[entry.id]:
 546                        if ref["type"] == "outbound" and "target_id" in ref:
 547                            target_entry = self._find_entry_by_id(ref["target_id"])
 548                            if target_entry and target_entry.link:
 549                                url = str(target_entry.link)
 550                                url_counts[url] = url_counts.get(url, 0) + 1
 551                                if url not in referencing_entries:
 552                                    referencing_entries[url] = []
 553                                if entry.id not in referencing_entries[url]:
 554                                    referencing_entries[url].append(entry.id)
 555
 556        # Find URLs referenced by multiple entries
 557        for url, count in url_counts.items():
 558            if count > 1 and len(referencing_entries[url]) > 1:
 559                # Get the target entry info
 560                target_entry = None
 561                target_username = None
 562                for ref in (self.links_data or {}).get("references", []):
 563                    if ref.get("target_url") == url:
 564                        target_username = ref.get("target_username")
 565                        if ref.get("target_entry_id"):
 566                            target_entry = self._find_entry_by_id(ref["target_entry_id"])
 567                        break
 568
 569                shared_refs.append({
 570                    "url": url,
 571                    "count": count,
 572                    "referencing_entries": referencing_entries[url],
 573                    "target_username": target_username,
 574                    "target_title": target_entry.title if target_entry else None
 575                })
 576
 577        return sorted(shared_refs, key=lambda x: x["count"], reverse=True)
 578
 579    def generate_site(self) -> None:
 580        """Generate the static website."""
 581        # Create output directory
 582        self.output_dir.mkdir(parents=True, exist_ok=True)
 583
 584        # Create static directories
 585        (self.output_dir / "css").mkdir(exist_ok=True)
 586        (self.output_dir / "js").mkdir(exist_ok=True)
 587
 588        # Generate CSS
 589        css_template = self.env.get_template("style.css")
 590        css_content = css_template.render()
 591        with open(self.output_dir / "css" / "style.css", "w") as f:
 592            f.write(css_content)
 593
 594        # Generate JavaScript
 595        js_template = self.env.get_template("script.js")
 596        js_content = js_template.render()
 597        with open(self.output_dir / "js" / "script.js", "w") as f:
 598            f.write(js_content)
 599
 600        # Prepare common template data
 601        base_data = {
 602            "title": "Energy & Environment Group",
 603            "generated_at": datetime.now().isoformat(),
 604            "get_display_name": self.get_display_name,
 605            "get_user_homepage": self.get_user_homepage,
 606            "clean_html_summary": self.clean_html_summary,
 607            "safe_anchor_id": safe_anchor_id,
 608        }
 609
 610        # Build unified timeline
 611        timeline_items = []
 612
 613        # Only consider the threads that will actually be displayed
 614        displayed_threads = self.threads[:20]  # Limit to 20 threads
 615
 616        # Track which entries are part of displayed threads
 617        threaded_entry_ids = set()
 618        for thread in displayed_threads:
 619            for item in thread:
 620                threaded_entry_ids.add(item["entry_id"])
 621
 622        # Add threads to timeline (using the date of the most recent post)
 623        for thread in displayed_threads:
 624            most_recent_date = max(
 625                item["entry"].updated or item["entry"].published or datetime.min
 626                for item in thread
 627            )
 628            timeline_items.append({
 629                "type": "thread",
 630                "date": most_recent_date,
 631                "content": thread
 632            })
 633
 634        # Add individual posts (not in threads)
 635        for username, entry in self.entries[:50]:
 636            if entry.id not in threaded_entry_ids:
 637                # Check if this entry has references
 638                has_refs = (
 639                    entry.id in self.reference_details
 640                    if hasattr(self, "reference_details")
 641                    else False
 642                )
 643
 644                refs = []
 645                if has_refs:
 646                    refs = self.reference_details.get(entry.id, [])
 647                    refs = [
 648                        r for r in refs
 649                        if r.get("target_id") != entry.id
 650                        and r.get("source_id") != entry.id
 651                    ]
 652
 653                timeline_items.append({
 654                    "type": "post",
 655                    "date": entry.updated or entry.published or datetime.min,
 656                    "content": {
 657                        "username": username,
 658                        "display_name": self.get_display_name(username),
 659                        "entry": entry,
 660                        "references": refs if refs else None
 661                    }
 662                })
 663
 664        # Sort unified timeline by date (newest first)
 665        timeline_items.sort(key=lambda x: x["date"], reverse=True)
 666
 667        # Limit timeline to what will actually be rendered
 668        timeline_items = timeline_items[:50]  # Limit to 50 items total
 669
 670        # Add cross-thread linking for repeat blog references
 671        self._add_cross_thread_links(timeline_items)
 672
 673        # Prepare outgoing links data
 674        outgoing_links = []
 675        if self.links_data and "links" in self.links_data:
 676            for url, link_info in self.links_data["links"].items():
 677                referencing_entries = []
 678                for entry_id in link_info.get("referencing_entries", []):
 679                    for username, entry in self.entries:
 680                        if entry.id == entry_id:
 681                            referencing_entries.append(
 682                                (self.get_display_name(username), entry)
 683                            )
 684                            break
 685
 686                if referencing_entries:
 687                    # Sort by date - prioritize updated over published
 688                    referencing_entries.sort(
 689                        key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True
 690                    )
 691                    outgoing_links.append(
 692                        {
 693                            "url": url,
 694                            "target_username": link_info.get("target_username"),
 695                            "entries": referencing_entries,
 696                        }
 697                    )
 698
 699            # Sort links by most recent reference - prioritize updated over published
 700            outgoing_links.sort(
 701                key=lambda x: x["entries"][0][1].updated
 702                or x["entries"][0][1].published or datetime.min,
 703                reverse=True,
 704            )
 705
 706        # Prepare users data
 707        users: list[UserData] = []
 708        if self.index:
 709            for username, user_metadata in self.index.users.items():
 710                # Get recent entries for this user with display names
 711                user_entries = [
 712                    (self.get_display_name(u), e)
 713                    for u, e in self.entries
 714                    if u == username
 715                ][:5]
 716                users.append(
 717                    {"metadata": user_metadata, "recent_entries": user_entries}
 718                )
 719            # Sort by entry count
 720            users.sort(key=lambda x: x["metadata"].entry_count, reverse=True)
 721
 722        # Generate timeline page
 723        timeline_template = self.env.get_template("timeline.html")
 724        timeline_content = timeline_template.render(
 725            **base_data,
 726            page="timeline",
 727            timeline_items=timeline_items,  # Already limited above
 728        )
 729        with open(self.output_dir / "timeline.html", "w") as f:
 730            f.write(timeline_content)
 731
 732        # Generate links page
 733        links_template = self.env.get_template("links.html")
 734        links_content = links_template.render(
 735            **base_data,
 736            page="links",
 737            outgoing_links=outgoing_links[:100],
 738        )
 739        with open(self.output_dir / "links.html", "w") as f:
 740            f.write(links_content)
 741
 742        # Generate users page
 743        users_template = self.env.get_template("users.html")
 744        users_content = users_template.render(
 745            **base_data,
 746            page="users",
 747            users=users,
 748        )
 749        with open(self.output_dir / "users.html", "w") as f:
 750            f.write(users_content)
 751
 752        # Generate main index page (redirect to timeline)
 753        index_template = self.env.get_template("index.html")
 754        index_content = index_template.render(**base_data)
 755        with open(self.output_dir / "index.html", "w") as f:
 756            f.write(index_content)
 757
 758        console.print(f"[green]✓[/green] Generated website at {self.output_dir}")
 759        console.print(f"  - {len(self.entries)} entries")
 760        console.print(f"  - {len(self.threads)} conversation threads")
 761        console.print(f"  - {len(outgoing_links)} outgoing links")
 762        console.print(f"  - {len(users)} users")
 763        console.print(
 764            "  - Generated pages: index.html, timeline.html, links.html, users.html"
 765        )
 766
 767
 768@app.command()
 769def generate(
 770    output: Path = typer.Option(
 771        Path("./thicket-site"),
 772        "--output",
 773        "-o",
 774        help="Output directory for the generated website",
 775    ),
 776    force: bool = typer.Option(
 777        False, "--force", "-f", help="Overwrite existing output directory"
 778    ),
 779    config_file: Path = typer.Option(
 780        Path("thicket.yaml"), "--config", help="Configuration file path"
 781    ),
 782) -> None:
 783    """Generate a static HTML website from thicket data."""
 784    config = load_config(config_file)
 785
 786    if not config.git_store:
 787        console.print("[red]No git store path configured[/red]")
 788        raise typer.Exit(1)
 789
 790    git_store = GitStore(config.git_store)
 791
 792    # Check if output directory exists
 793    if output.exists() and not force:
 794        console.print(
 795            f"[red]Output directory {output} already exists. Use --force to overwrite.[/red]"
 796        )
 797        raise typer.Exit(1)
 798
 799    # Clean output directory if forcing
 800    if output.exists() and force:
 801        shutil.rmtree(output)
 802
 803    try:
 804        generator = WebsiteGenerator(git_store, output)
 805
 806        console.print("[bold]Generating static website...[/bold]")
 807        generator.load_data()
 808        generator.build_threads()
 809        generator.generate_site()
 810
 811    except Exception as e:
 812        console.print(f"[red]Error generating website: {e}[/red]")
 813        raise typer.Exit(1) from e
 814</file>
 815
 816<file path="src/thicket/templates/base.html">
 817<!DOCTYPE html>
 818<html lang="en">
 819<head>
 820    <meta charset="UTF-8">
 821    <meta name="viewport" content="width=device-width, initial-scale=1.0">
 822    <title>{% block page_title %}{{ title }}{% endblock %}</title>
 823    <link rel="stylesheet" href="css/style.css">
 824</head>
 825<body>
 826    <header class="site-header">
 827        <div class="header-content">
 828            <h1 class="site-title">{{ title }}</h1>
 829            <nav class="site-nav">
 830                <a href="timeline.html" class="nav-link {% if page == 'timeline' %}active{% endif %}">Timeline</a>
 831                <a href="links.html" class="nav-link {% if page == 'links' %}active{% endif %}">Links</a>
 832                <a href="users.html" class="nav-link {% if page == 'users' %}active{% endif %}">Users</a>
 833            </nav>
 834        </div>
 835    </header>
 836
 837    <main class="main-content">
 838        {% block content %}{% endblock %}
 839    </main>
 840
 841    <footer class="site-footer">
 842        <p>Generated on {{ generated_at }} by <a href="https://github.com/avsm/thicket">Thicket</a></p>
 843    </footer>
 844
 845    <script src="js/script.js"></script>
 846</body>
 847</html>
 848</file>
 849
 850<file path="src/thicket/templates/index.html">
 851<!DOCTYPE html>
 852<html lang="en">
 853<head>
 854    <meta charset="UTF-8">
 855    <meta name="viewport" content="width=device-width, initial-scale=1.0">
 856    <title>{{ title }}</title>
 857    <meta http-equiv="refresh" content="0; url=timeline.html">
 858    <link rel="canonical" href="timeline.html">
 859</head>
 860<body>
 861    <p>Redirecting to <a href="timeline.html">Timeline</a>...</p>
 862</body>
 863</html>
 864</file>
 865
 866<file path="src/thicket/templates/links.html">
 867{% extends "base.html" %}
 868
 869{% block page_title %}Outgoing Links - {{ title }}{% endblock %}
 870
 871{% block content %}
 872<div class="page-content">
 873    <h2>Outgoing Links</h2>
 874    <p class="page-description">External links referenced in blog posts, ordered by most recent reference.</p>
 875    
 876    {% for link in outgoing_links %}
 877    <article class="link-group">
 878        <h3 class="link-url">
 879            <a href="{{ link.url }}" target="_blank">{{ link.url|truncate(80) }}</a>
 880            {% if link.target_username %}
 881            <span class="target-user">({{ link.target_username }})</span>
 882            {% endif %}
 883        </h3>
 884        <div class="referencing-entries">
 885            <span class="ref-count">Referenced in {{ link.entries|length }} post(s):</span>
 886            <ul>
 887                {% for display_name, entry in link.entries[:5] %}
 888                <li>
 889                    <span class="author">{{ display_name }}</span> - 
 890                    <a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a>
 891                    <time datetime="{{ entry.updated or entry.published }}">
 892                        ({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }})
 893                    </time>
 894                </li>
 895                {% endfor %}
 896                {% if link.entries|length > 5 %}
 897                <li class="more">... and {{ link.entries|length - 5 }} more</li>
 898                {% endif %}
 899            </ul>
 900        </div>
 901    </article>
 902    {% endfor %}
 903</div>
 904{% endblock %}
 905</file>
 906
 907<file path="src/thicket/templates/script.js">
 908// Enhanced functionality for thicket website
 909document.addEventListener('DOMContentLoaded', function() {
 910    
 911    // Enhance thread collapsing (optional feature)
 912    const threadHeaders = document.querySelectorAll('.thread-header');
 913    threadHeaders.forEach(header => {
 914        header.style.cursor = 'pointer';
 915        header.addEventListener('click', function() {
 916            const thread = this.parentElement;
 917            const entries = thread.querySelectorAll('.thread-entry');
 918            
 919            // Toggle visibility of all but the first entry
 920            for (let i = 1; i < entries.length; i++) {
 921                entries[i].style.display = entries[i].style.display === 'none' ? 'block' : 'none';
 922            }
 923            
 924            // Update thread count text
 925            const count = this.querySelector('.thread-count');
 926            if (entries[1] && entries[1].style.display === 'none') {
 927                count.textContent = count.textContent.replace('posts', 'posts (collapsed)');
 928            } else {
 929                count.textContent = count.textContent.replace(' (collapsed)', '');
 930            }
 931        });
 932    });
 933    
 934    // Add relative time display
 935    const timeElements = document.querySelectorAll('time');
 936    timeElements.forEach(timeEl => {
 937        const datetime = new Date(timeEl.getAttribute('datetime'));
 938        const now = new Date();
 939        const diffMs = now - datetime;
 940        const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
 941        
 942        let relativeTime;
 943        if (diffDays === 0) {
 944            const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
 945            if (diffHours === 0) {
 946                const diffMinutes = Math.floor(diffMs / (1000 * 60));
 947                relativeTime = diffMinutes === 0 ? 'just now' : `${diffMinutes}m ago`;
 948            } else {
 949                relativeTime = `${diffHours}h ago`;
 950            }
 951        } else if (diffDays === 1) {
 952            relativeTime = 'yesterday';
 953        } else if (diffDays < 7) {
 954            relativeTime = `${diffDays}d ago`;
 955        } else if (diffDays < 30) {
 956            const weeks = Math.floor(diffDays / 7);
 957            relativeTime = weeks === 1 ? '1w ago' : `${weeks}w ago`;
 958        } else if (diffDays < 365) {
 959            const months = Math.floor(diffDays / 30);
 960            relativeTime = months === 1 ? '1mo ago' : `${months}mo ago`;
 961        } else {
 962            const years = Math.floor(diffDays / 365);
 963            relativeTime = years === 1 ? '1y ago' : `${years}y ago`;
 964        }
 965        
 966        // Add relative time as title attribute
 967        timeEl.setAttribute('title', timeEl.textContent);
 968        timeEl.textContent = relativeTime;
 969    });
 970    
 971    // Enhanced anchor link scrolling for shared references
 972    document.querySelectorAll('a[href^="#"]').forEach(anchor => {
 973        anchor.addEventListener('click', function (e) {
 974            e.preventDefault();
 975            const target = document.querySelector(this.getAttribute('href'));
 976            if (target) {
 977                target.scrollIntoView({
 978                    behavior: 'smooth',
 979                    block: 'center'
 980                });
 981                
 982                // Highlight the target briefly
 983                const timelineEntry = target.closest('.timeline-entry');
 984                if (timelineEntry) {
 985                    timelineEntry.style.outline = '2px solid var(--primary-color)';
 986                    timelineEntry.style.borderRadius = '8px';
 987                    setTimeout(() => {
 988                        timelineEntry.style.outline = '';
 989                        timelineEntry.style.borderRadius = '';
 990                    }, 2000);
 991                }
 992            }
 993        });
 994    });
 995});
 996</file>
 997
 998<file path="src/thicket/templates/style.css">
 999/* Modern, clean design with high-density text and readable theme */
1000
1001:root {
1002    --primary-color: #2c3e50;
1003    --secondary-color: #3498db;
1004    --accent-color: #e74c3c;
1005    --background: #ffffff;
1006    --surface: #f8f9fa;
1007    --text-primary: #2c3e50;
1008    --text-secondary: #7f8c8d;
1009    --border-color: #e0e0e0;
1010    --thread-indent: 20px;
1011    --max-width: 1200px;
1012}
1013
1014* {
1015    margin: 0;
1016    padding: 0;
1017    box-sizing: border-box;
1018}
1019
1020body {
1021    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
1022    font-size: 14px;
1023    line-height: 1.6;
1024    color: var(--text-primary);
1025    background-color: var(--background);
1026}
1027
1028/* Header */
1029.site-header {
1030    background-color: var(--surface);
1031    border-bottom: 1px solid var(--border-color);
1032    padding: 0.75rem 0;
1033    position: sticky;
1034    top: 0;
1035    z-index: 100;
1036}
1037
1038.header-content {
1039    max-width: var(--max-width);
1040    margin: 0 auto;
1041    padding: 0 2rem;
1042    display: flex;
1043    justify-content: space-between;
1044    align-items: center;
1045}
1046
1047.site-title {
1048    font-size: 1.5rem;
1049    font-weight: 600;
1050    color: var(--primary-color);
1051    margin: 0;
1052}
1053
1054/* Navigation */
1055.site-nav {
1056    display: flex;
1057    gap: 1.5rem;
1058}
1059
1060.nav-link {
1061    text-decoration: none;
1062    color: var(--text-secondary);
1063    font-weight: 500;
1064    font-size: 0.95rem;
1065    padding: 0.5rem 0.75rem;
1066    border-radius: 4px;
1067    transition: all 0.2s ease;
1068}
1069
1070.nav-link:hover {
1071    color: var(--primary-color);
1072    background-color: var(--background);
1073}
1074
1075.nav-link.active {
1076    color: var(--secondary-color);
1077    background-color: var(--background);
1078    font-weight: 600;
1079}
1080
1081/* Main Content */
1082.main-content {
1083    max-width: var(--max-width);
1084    margin: 2rem auto;
1085    padding: 0 2rem;
1086}
1087
1088.page-content {
1089    margin: 0;
1090}
1091
1092.page-description {
1093    color: var(--text-secondary);
1094    margin-bottom: 1.5rem;
1095    font-style: italic;
1096}
1097
1098/* Sections */
1099section {
1100    margin-bottom: 2rem;
1101}
1102
1103h2 {
1104    font-size: 1.3rem;
1105    font-weight: 600;
1106    margin-bottom: 0.75rem;
1107    color: var(--primary-color);
1108}
1109
1110h3 {
1111    font-size: 1.1rem;
1112    font-weight: 600;
1113    margin-bottom: 0.75rem;
1114    color: var(--primary-color);
1115}
1116
1117/* Entries and Threads */
1118article {
1119    margin-bottom: 1.5rem;
1120    padding: 1rem;
1121    background-color: var(--surface);
1122    border-radius: 4px;
1123    border: 1px solid var(--border-color);
1124}
1125
1126/* Timeline-style entries */
1127.timeline-entry {
1128    margin-bottom: 0.5rem;
1129    padding: 0.5rem 0.75rem;
1130    border: none;
1131    background: transparent;
1132    transition: background-color 0.2s ease;
1133}
1134
1135.timeline-entry:hover {
1136    background-color: var(--surface);
1137}
1138
1139.timeline-meta {
1140    display: inline-flex;
1141    gap: 0.5rem;
1142    align-items: center;
1143    font-size: 0.75rem;
1144    color: var(--text-secondary);
1145    margin-bottom: 0.25rem;
1146}
1147
1148.timeline-time {
1149    font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
1150    font-size: 0.75rem;
1151    color: var(--text-secondary);
1152}
1153
1154.timeline-author {
1155    font-weight: 600;
1156    color: var(--primary-color);
1157    font-size: 0.8rem;
1158    text-decoration: none;
1159}
1160
1161.timeline-author:hover {
1162    color: var(--secondary-color);
1163    text-decoration: underline;
1164}
1165
1166.timeline-content {
1167    line-height: 1.4;
1168}
1169
1170.timeline-title {
1171    font-size: 0.95rem;
1172    font-weight: 600;
1173}
1174
1175.timeline-title a {
1176    color: var(--primary-color);
1177    text-decoration: none;
1178}
1179
1180.timeline-title a:hover {
1181    color: var(--secondary-color);
1182    text-decoration: underline;
1183}
1184
1185.timeline-summary {
1186    color: var(--text-secondary);
1187    font-size: 0.9rem;
1188    line-height: 1.4;
1189}
1190
1191/* Legacy styles for other sections */
1192.entry-meta, .thread-header {
1193    display: flex;
1194    gap: 1rem;
1195    align-items: center;
1196    margin-bottom: 0.5rem;
1197    font-size: 0.85rem;
1198    color: var(--text-secondary);
1199}
1200
1201.author {
1202    font-weight: 600;
1203    color: var(--primary-color);
1204}
1205
1206time {
1207    font-size: 0.85rem;
1208}
1209
1210h4 {
1211    font-size: 1.1rem;
1212    font-weight: 600;
1213    margin-bottom: 0.5rem;
1214}
1215
1216h4 a {
1217    color: var(--primary-color);
1218    text-decoration: none;
1219}
1220
1221h4 a:hover {
1222    color: var(--secondary-color);
1223    text-decoration: underline;
1224}
1225
1226.entry-summary {
1227    color: var(--text-primary);
1228    line-height: 1.5;
1229    margin-top: 0.5rem;
1230}
1231
1232/* Enhanced Threading Styles */
1233
1234/* Conversation Clusters */
1235.conversation-cluster {
1236    background-color: var(--background);
1237    border: 2px solid var(--border-color);
1238    border-radius: 8px;
1239    margin-bottom: 2rem;
1240    overflow: hidden;
1241    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
1242}
1243
1244.conversation-header {
1245    background: linear-gradient(135deg, var(--surface) 0%, #f1f3f4 100%);
1246    padding: 0.75rem 1rem;
1247    border-bottom: 1px solid var(--border-color);
1248}
1249
1250.conversation-meta {
1251    display: flex;
1252    justify-content: space-between;
1253    align-items: center;
1254    flex-wrap: wrap;
1255    gap: 0.5rem;
1256}
1257
1258.conversation-count {
1259    font-weight: 600;
1260    color: var(--secondary-color);
1261    font-size: 0.9rem;
1262}
1263
1264.conversation-participants {
1265    font-size: 0.8rem;
1266    color: var(--text-secondary);
1267    flex: 1;
1268    text-align: right;
1269}
1270
1271.conversation-flow {
1272    padding: 0.5rem;
1273}
1274
1275/* Threaded Conversation Entries */
1276.conversation-entry {
1277    position: relative;
1278    margin-bottom: 0.75rem;
1279    display: flex;
1280    align-items: flex-start;
1281}
1282
1283.conversation-entry.level-0 {
1284    margin-left: 0;
1285}
1286
1287.conversation-entry.level-1 {
1288    margin-left: 1.5rem;
1289}
1290
1291.conversation-entry.level-2 {
1292    margin-left: 3rem;
1293}
1294
1295.conversation-entry.level-3 {
1296    margin-left: 4.5rem;
1297}
1298
1299.conversation-entry.level-4 {
1300    margin-left: 6rem;
1301}
1302
1303.entry-connector {
1304    width: 3px;
1305    background-color: var(--secondary-color);
1306    margin-right: 0.75rem;
1307    margin-top: 0.25rem;
1308    min-height: 2rem;
1309    border-radius: 2px;
1310    opacity: 0.6;
1311}
1312
1313.conversation-entry.level-0 .entry-connector {
1314    background-color: var(--accent-color);
1315    opacity: 0.8;
1316}
1317
1318.entry-content {
1319    flex: 1;
1320    background-color: var(--surface);
1321    padding: 0.75rem;
1322    border-radius: 6px;
1323    border: 1px solid var(--border-color);
1324    transition: all 0.2s ease;
1325}
1326
1327.entry-content:hover {
1328    border-color: var(--secondary-color);
1329    box-shadow: 0 2px 8px rgba(52, 152, 219, 0.1);
1330}
1331
1332/* Reference Indicators */
1333.reference-indicators {
1334    display: inline-flex;
1335    gap: 0.25rem;
1336    margin-left: 0.5rem;
1337}
1338
1339.ref-out, .ref-in {
1340    display: inline-block;
1341    width: 1rem;
1342    height: 1rem;
1343    border-radius: 50%;
1344    text-align: center;
1345    line-height: 1rem;
1346    font-size: 0.7rem;
1347    font-weight: bold;
1348}
1349
1350.ref-out {
1351    background-color: #e8f5e8;
1352    color: #2d8f2d;
1353}
1354
1355.ref-in {
1356    background-color: #e8f0ff;
1357    color: #1f5fbf;
1358}
1359
1360/* Reference Badges for Individual Posts */
1361.timeline-entry.with-references {
1362    background-color: var(--surface);
1363}
1364
1365/* Conversation posts in unified timeline */
1366.timeline-entry.conversation-post {
1367    background: transparent;
1368    border: none;
1369    margin-bottom: 0.5rem;
1370    padding: 0.5rem 0.75rem;
1371}
1372
1373.timeline-entry.conversation-post.level-0 {
1374    margin-left: 0;
1375    border-left: 2px solid var(--accent-color);
1376    padding-left: 0.75rem;
1377}
1378
1379.timeline-entry.conversation-post.level-1 {
1380    margin-left: 1.5rem;
1381    border-left: 2px solid var(--secondary-color);
1382    padding-left: 0.75rem;
1383}
1384
1385.timeline-entry.conversation-post.level-2 {
1386    margin-left: 3rem;
1387    border-left: 2px solid var(--text-secondary);
1388    padding-left: 0.75rem;
1389}
1390
1391.timeline-entry.conversation-post.level-3 {
1392    margin-left: 4.5rem;
1393    border-left: 2px solid var(--text-secondary);
1394    padding-left: 0.75rem;
1395}
1396
1397.timeline-entry.conversation-post.level-4 {
1398    margin-left: 6rem;
1399    border-left: 2px solid var(--text-secondary);
1400    padding-left: 0.75rem;
1401}
1402
1403/* Cross-thread linking */
1404.cross-thread-links {
1405    margin-top: 0.5rem;
1406    padding-top: 0.5rem;
1407    border-top: 1px solid var(--border-color);
1408}
1409
1410.cross-thread-indicator {
1411    font-size: 0.75rem;
1412    color: var(--text-secondary);
1413    background-color: var(--surface);
1414    padding: 0.25rem 0.5rem;
1415    border-radius: 12px;
1416    border: 1px solid var(--border-color);
1417    display: inline-block;
1418}
1419
1420/* Inline shared references styling */
1421.inline-shared-refs {
1422    margin-left: 0.5rem;
1423    font-size: 0.85rem;
1424    color: var(--text-secondary);
1425}
1426
1427.shared-ref-link {
1428    color: var(--primary-color);
1429    text-decoration: none;
1430    font-weight: 500;
1431    transition: color 0.2s ease;
1432}
1433
1434.shared-ref-link:hover {
1435    color: var(--secondary-color);
1436    text-decoration: underline;
1437}
1438
1439.shared-ref-more {
1440    font-style: italic;
1441    color: var(--text-secondary);
1442    font-size: 0.8rem;
1443    margin-left: 0.25rem;
1444}
1445
1446.user-anchor, .post-anchor {
1447    position: absolute;
1448    margin-top: -60px; /* Offset for fixed header */
1449    pointer-events: none;
1450}
1451
1452.cross-thread-link {
1453    color: var(--primary-color);
1454    text-decoration: none;
1455    font-weight: 500;
1456    transition: color 0.2s ease;
1457}
1458
1459.cross-thread-link:hover {
1460    color: var(--secondary-color);
1461    text-decoration: underline;
1462}
1463
1464.reference-badges {
1465    display: flex;
1466    gap: 0.25rem;
1467    margin-left: 0.5rem;
1468    flex-wrap: wrap;
1469}
1470
1471.ref-badge {
1472    display: inline-block;
1473    padding: 0.1rem 0.4rem;
1474    border-radius: 12px;
1475    font-size: 0.7rem;
1476    font-weight: 600;
1477    text-transform: uppercase;
1478    letter-spacing: 0.05em;
1479}
1480
1481.ref-badge.ref-outbound {
1482    background-color: #e8f5e8;
1483    color: #2d8f2d;
1484    border: 1px solid #c3e6c3;
1485}
1486
1487.ref-badge.ref-inbound {
1488    background-color: #e8f0ff;
1489    color: #1f5fbf;
1490    border: 1px solid #b3d9ff;
1491}
1492
1493/* Author Color Coding */
1494.timeline-author {
1495    position: relative;
1496}
1497
1498.timeline-author::before {
1499    content: '';
1500    display: inline-block;
1501    width: 8px;
1502    height: 8px;
1503    border-radius: 50%;
1504    margin-right: 0.5rem;
1505    background-color: var(--secondary-color);
1506}
1507
1508/* Generate consistent colors for authors */
1509.author-avsm::before { background-color: #e74c3c; }
1510.author-mort::before { background-color: #3498db; }
1511.author-mte::before { background-color: #2ecc71; }
1512.author-ryan::before { background-color: #f39c12; }
1513.author-mwd::before { background-color: #9b59b6; }
1514.author-dra::before { background-color: #1abc9c; }
1515.author-pf341::before { background-color: #34495e; }
1516.author-sadiqj::before { background-color: #e67e22; }
1517.author-martinkl::before { background-color: #8e44ad; }
1518.author-jonsterling::before { background-color: #27ae60; }
1519.author-jon::before { background-color: #f1c40f; }
1520.author-onkar::before { background-color: #e91e63; }
1521.author-gabriel::before { background-color: #00bcd4; }
1522.author-jess::before { background-color: #ff5722; }
1523.author-ibrahim::before { background-color: #607d8b; }
1524.author-andres::before { background-color: #795548; }
1525.author-eeg::before { background-color: #ff9800; }
1526
1527/* Section Headers */
1528.conversations-section h3,
1529.referenced-posts-section h3,
1530.individual-posts-section h3 {
1531    border-bottom: 2px solid var(--border-color);
1532    padding-bottom: 0.5rem;
1533    margin-bottom: 1.5rem;
1534    position: relative;
1535}
1536
1537.conversations-section h3::before {
1538    content: "💬";
1539    margin-right: 0.5rem;
1540}
1541
1542.referenced-posts-section h3::before {
1543    content: "🔗";
1544    margin-right: 0.5rem;
1545}
1546
1547.individual-posts-section h3::before {
1548    content: "📝";
1549    margin-right: 0.5rem;
1550}
1551
1552/* Legacy thread styles (for backward compatibility) */
1553.thread {
1554    background-color: var(--background);
1555    border: 1px solid var(--border-color);
1556    padding: 0;
1557    overflow: hidden;
1558    margin-bottom: 1rem;
1559}
1560
1561.thread-header {
1562    background-color: var(--surface);
1563    padding: 0.5rem 0.75rem;
1564    border-bottom: 1px solid var(--border-color);
1565}
1566
1567.thread-count {
1568    font-weight: 600;
1569    color: var(--secondary-color);
1570}
1571
1572.thread-entry {
1573    padding: 0.5rem 0.75rem;
1574    border-bottom: 1px solid var(--border-color);
1575}
1576
1577.thread-entry:last-child {
1578    border-bottom: none;
1579}
1580
1581.thread-entry.reply {
1582    margin-left: var(--thread-indent);
1583    border-left: 3px solid var(--secondary-color);
1584    background-color: var(--surface);
1585}
1586
1587/* Links Section */
1588.link-group {
1589    background-color: var(--background);
1590}
1591
1592.link-url {
1593    font-size: 1rem;
1594    word-break: break-word;
1595}
1596
1597.link-url a {
1598    color: var(--secondary-color);
1599    text-decoration: none;
1600}
1601
1602.link-url a:hover {
1603    text-decoration: underline;
1604}
1605
1606.target-user {
1607    font-size: 0.9rem;
1608    color: var(--text-secondary);
1609    font-weight: normal;
1610}
1611
1612.referencing-entries {
1613    margin-top: 0.75rem;
1614}
1615
1616.ref-count {
1617    font-weight: 600;
1618    color: var(--text-secondary);
1619    font-size: 0.9rem;
1620}
1621
1622.referencing-entries ul {
1623    list-style: none;
1624    margin-top: 0.5rem;
1625    padding-left: 1rem;
1626}
1627
1628.referencing-entries li {
1629    margin-bottom: 0.25rem;
1630    font-size: 0.9rem;
1631}
1632
1633.referencing-entries .more {
1634    font-style: italic;
1635    color: var(--text-secondary);
1636}
1637
1638/* Users Section */
1639.user-card {
1640    background-color: var(--background);
1641}
1642
1643.user-header {
1644    display: flex;
1645    gap: 1rem;
1646    align-items: start;
1647    margin-bottom: 1rem;
1648}
1649
1650.user-icon {
1651    width: 48px;
1652    height: 48px;
1653    border-radius: 50%;
1654    object-fit: cover;
1655}
1656
1657.user-info h3 {
1658    margin-bottom: 0.25rem;
1659}
1660
1661.username {
1662    font-size: 0.9rem;
1663    color: var(--text-secondary);
1664    font-weight: normal;
1665}
1666
1667.user-meta {
1668    font-size: 0.9rem;
1669    color: var(--text-secondary);
1670}
1671
1672.user-meta a {
1673    color: var(--secondary-color);
1674    text-decoration: none;
1675}
1676
1677.user-meta a:hover {
1678    text-decoration: underline;
1679}
1680
1681.separator {
1682    margin: 0 0.5rem;
1683}
1684
1685.post-count {
1686    font-weight: 600;
1687}
1688
1689.user-recent h4 {
1690    font-size: 0.95rem;
1691    margin-bottom: 0.5rem;
1692    color: var(--text-secondary);
1693}
1694
1695.user-recent ul {
1696    list-style: none;
1697    padding-left: 0;
1698}
1699
1700.user-recent li {
1701    margin-bottom: 0.25rem;
1702    font-size: 0.9rem;
1703}
1704
1705/* Footer */
1706.site-footer {
1707    max-width: var(--max-width);
1708    margin: 3rem auto 2rem;
1709    padding: 1rem 2rem;
1710    text-align: center;
1711    color: var(--text-secondary);
1712    font-size: 0.85rem;
1713    border-top: 1px solid var(--border-color);
1714}
1715
1716.site-footer a {
1717    color: var(--secondary-color);
1718    text-decoration: none;
1719}
1720
1721.site-footer a:hover {
1722    text-decoration: underline;
1723}
1724
1725/* Responsive */
1726@media (max-width: 768px) {
1727    .site-title {
1728        font-size: 1.3rem;
1729    }
1730    
1731    .header-content {
1732        flex-direction: column;
1733        gap: 0.75rem;
1734        align-items: flex-start;
1735    }
1736    
1737    .site-nav {
1738        gap: 1rem;
1739    }
1740    
1741    .main-content {
1742        padding: 0 1rem;
1743    }
1744    
1745    .thread-entry.reply {
1746        margin-left: calc(var(--thread-indent) / 2);
1747    }
1748    
1749    .user-header {
1750        flex-direction: column;
1751    }
1752}
1753</file>
1754
1755<file path="src/thicket/templates/timeline.html">
1756{% extends "base.html" %}
1757
1758{% block page_title %}Timeline - {{ title }}{% endblock %}
1759
1760{% block content %}
1761{% set seen_users = [] %}
1762<div class="page-content">
1763    <h2>Recent Posts & Conversations</h2>
1764    
1765    <section class="unified-timeline">
1766        {% for item in timeline_items %}
1767            {% if item.type == "post" %}
1768                <!-- Individual Post -->
1769                <article class="timeline-entry {% if item.content.references %}with-references{% endif %}">
1770                    <div class="timeline-meta">
1771                        <time datetime="{{ item.content.entry.updated or item.content.entry.published }}" class="timeline-time">
1772                            {{ (item.content.entry.updated or item.content.entry.published).strftime('%Y-%m-%d %H:%M') }}
1773                        </time>
1774                        {% set homepage = get_user_homepage(item.content.username) %}
1775                        {% if item.content.username not in seen_users %}
1776                        <a id="{{ item.content.username }}" class="user-anchor"></a>
1777                        {% set _ = seen_users.append(item.content.username) %}
1778                        {% endif %}
1779                        <a id="post-{{ loop.index0 }}-{{ safe_anchor_id(item.content.entry.id) }}" class="post-anchor"></a>
1780                        {% if homepage %}
1781                        <a href="{{ homepage }}" target="_blank" class="timeline-author">{{ item.content.display_name }}</a>
1782                        {% else %}
1783                        <span class="timeline-author">{{ item.content.display_name }}</span>
1784                        {% endif %}
1785                        {% if item.content.references %}
1786                        <div class="reference-badges">
1787                            {% for ref in item.content.references %}
1788                                {% if ref.type == 'outbound' %}
1789                                <span class="ref-badge ref-outbound" title="References {{ ref.target_username or 'external post' }}">
1790                                    → {{ ref.target_username or 'ext' }}
1791                                </span>
1792                                {% elif ref.type == 'inbound' %}
1793                                <span class="ref-badge ref-inbound" title="Referenced by {{ ref.source_username or 'external post' }}">
1794                                    ← {{ ref.source_username or 'ext' }}
1795                                </span>
1796                                {% endif %}
1797                            {% endfor %}
1798                        </div>
1799                        {% endif %}
1800                    </div>
1801                    <div class="timeline-content">
1802                        <strong class="timeline-title">
1803                            <a href="{{ item.content.entry.link }}" target="_blank">{{ item.content.entry.title }}</a>
1804                        </strong>
1805                        {% if item.content.entry.summary %}
1806                        <span class="timeline-summary">— {{ clean_html_summary(item.content.entry.summary, 250) }}</span>
1807                        {% endif %}
1808                        {% if item.content.shared_references %}
1809                        <span class="inline-shared-refs">
1810                            {% for ref in item.content.shared_references[:3] %}
1811                            {% if ref.target_username %}
1812                            <a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %}
1813                            {% endif %}
1814                            {% endfor %}
1815                            {% if item.content.shared_references|length > 3 %}
1816                            <span class="shared-ref-more">+{{ item.content.shared_references|length - 3 }} more</span>
1817                            {% endif %}
1818                        </span>
1819                        {% endif %}
1820                        {% if item.content.cross_thread_links %}
1821                        <div class="cross-thread-links">
1822                            <span class="cross-thread-indicator">🔗 Also appears: </span>
1823                            {% for link in item.content.cross_thread_links %}
1824                            <a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %}
1825                            {% endfor %}
1826                        </div>
1827                        {% endif %}
1828                    </div>
1829                </article>
1830            
1831            {% elif item.type == "thread" %}
1832                <!-- Conversation Thread -->
1833                {% set outer_loop_index = loop.index0 %}
1834                {% for thread_item in item.content %}
1835                <article class="timeline-entry conversation-post level-{{ thread_item.thread_level }}">
1836                    <div class="timeline-meta">
1837                        <time datetime="{{ thread_item.entry.updated or thread_item.entry.published }}" class="timeline-time">
1838                            {{ (thread_item.entry.updated or thread_item.entry.published).strftime('%Y-%m-%d %H:%M') }}
1839                        </time>
1840                        {% set homepage = get_user_homepage(thread_item.username) %}
1841                        {% if thread_item.username not in seen_users %}
1842                        <a id="{{ thread_item.username }}" class="user-anchor"></a>
1843                        {% set _ = seen_users.append(thread_item.username) %}
1844                        {% endif %}
1845                        <a id="post-{{ outer_loop_index }}-{{ loop.index0 }}-{{ safe_anchor_id(thread_item.entry.id) }}" class="post-anchor"></a>
1846                        {% if homepage %}
1847                        <a href="{{ homepage }}" target="_blank" class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</a>
1848                        {% else %}
1849                        <span class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</span>
1850                        {% endif %}
1851                        {% if thread_item.references_to or thread_item.referenced_by %}
1852                        <span class="reference-indicators">
1853                            {% if thread_item.references_to %}
1854                            <span class="ref-out" title="References other posts">→</span>
1855                            {% endif %}
1856                            {% if thread_item.referenced_by %}
1857                            <span class="ref-in" title="Referenced by other posts">←</span>
1858                            {% endif %}
1859                        </span>
1860                        {% endif %}
1861                    </div>
1862                    <div class="timeline-content">
1863                        <strong class="timeline-title">
1864                            <a href="{{ thread_item.entry.link }}" target="_blank">{{ thread_item.entry.title }}</a>
1865                        </strong>
1866                        {% if thread_item.entry.summary %}
1867                        <span class="timeline-summary">— {{ clean_html_summary(thread_item.entry.summary, 300) }}</span>
1868                        {% endif %}
1869                        {% if thread_item.shared_references %}
1870                        <span class="inline-shared-refs">
1871                            {% for ref in thread_item.shared_references[:3] %}
1872                            {% if ref.target_username %}
1873                            <a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %}
1874                            {% endif %}
1875                            {% endfor %}
1876                            {% if thread_item.shared_references|length > 3 %}
1877                            <span class="shared-ref-more">+{{ thread_item.shared_references|length - 3 }} more</span>
1878                            {% endif %}
1879                        </span>
1880                        {% endif %}
1881                        {% if thread_item.cross_thread_links %}
1882                        <div class="cross-thread-links">
1883                            <span class="cross-thread-indicator">🔗 Also appears: </span>
1884                            {% for link in thread_item.cross_thread_links %}
1885                            <a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %}
1886                            {% endfor %}
1887                        </div>
1888                        {% endif %}
1889                    </div>
1890                </article>
1891                {% endfor %}
1892            {% endif %}
1893        {% endfor %}
1894    </section>
1895</div>
1896{% endblock %}
1897</file>
1898
1899<file path="src/thicket/templates/users.html">
1900{% extends "base.html" %}
1901
1902{% block page_title %}Users - {{ title }}{% endblock %}
1903
1904{% block content %}
1905<div class="page-content">
1906    <h2>Users</h2>
1907    <p class="page-description">All users contributing to this thicket, ordered by post count.</p>
1908    
1909    {% for user_info in users %}
1910    <article class="user-card">
1911        <div class="user-header">
1912            {% if user_info.metadata.icon and user_info.metadata.icon != "None" %}
1913            <img src="{{ user_info.metadata.icon }}" alt="{{ user_info.metadata.username }}" class="user-icon">
1914            {% endif %}
1915            <div class="user-info">
1916                <h3>
1917                    {% if user_info.metadata.display_name %}
1918                        {{ user_info.metadata.display_name }}
1919                        <span class="username">({{ user_info.metadata.username }})</span>
1920                    {% else %}
1921                        {{ user_info.metadata.username }}
1922                    {% endif %}
1923                </h3>
1924                <div class="user-meta">
1925                    {% if user_info.metadata.homepage %}
1926                    <a href="{{ user_info.metadata.homepage }}" target="_blank">{{ user_info.metadata.homepage }}</a>
1927                    {% endif %}
1928                    {% if user_info.metadata.email %}
1929                    <span class="separator">•</span>
1930                    <a href="mailto:{{ user_info.metadata.email }}">{{ user_info.metadata.email }}</a>
1931                    {% endif %}
1932                    <span class="separator">•</span>
1933                    <span class="post-count">{{ user_info.metadata.entry_count }} posts</span>
1934                </div>
1935            </div>
1936        </div>
1937        
1938        {% if user_info.recent_entries %}
1939        <div class="user-recent">
1940            <h4>Recent posts:</h4>
1941            <ul>
1942                {% for display_name, entry in user_info.recent_entries %}
1943                <li>
1944                    <a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a>
1945                    <time datetime="{{ entry.updated or entry.published }}">
1946                        ({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }})
1947                    </time>
1948                </li>
1949                {% endfor %}
1950            </ul>
1951        </div>
1952        {% endif %}
1953    </article>
1954    {% endfor %}
1955</div>
1956{% endblock %}
1957</file>
1958
1959<file path="README.md">
1960# Thicket
1961
1962A modern CLI tool for persisting Atom/RSS feeds in Git repositories, designed to enable distributed webblog comment structures.
1963
1964## Features
1965
1966- **Feed Auto-Discovery**: Automatically extracts user metadata from Atom/RSS feeds
1967- **Git Storage**: Stores feed entries in a Git repository with full history
1968- **Duplicate Management**: Manual curation of duplicate entries across feeds
1969- **Modern CLI**: Built with Typer and Rich for beautiful terminal output
1970- **Comprehensive Parsing**: Supports RSS 0.9x, RSS 1.0, RSS 2.0, and Atom feeds
1971- **Cron-Friendly**: Designed for scheduled execution
1972
1973## Installation
1974
1975```bash
1976# Install from source
1977pip install -e .
1978
1979# Or install with dev dependencies
1980pip install -e .[dev]
1981```
1982
1983## Quick Start
1984
19851. **Initialize a new thicket repository:**
1986```bash
1987thicket init ./my-feeds
1988```
1989
19902. **Add a user with their feed:**
1991```bash
1992thicket add user "alice" --feed "https://alice.example.com/feed.xml"
1993```
1994
19953. **Sync feeds to download entries:**
1996```bash
1997thicket sync --all
1998```
1999
20004. **List users and feeds:**
2001```bash
2002thicket list users
2003thicket list feeds
2004thicket list entries
2005```
2006
2007## Commands
2008
2009### Initialize
2010```bash
2011thicket init <git-store-path> [--cache-dir <path>] [--config <config-file>]
2012```
2013
2014### Add Users and Feeds
2015```bash
2016# Add user with auto-discovery
2017thicket add user "username" --feed "https://example.com/feed.xml"
2018
2019# Add user with manual metadata
2020thicket add user "username" \
2021  --feed "https://example.com/feed.xml" \
2022  --email "user@example.com" \
2023  --homepage "https://example.com" \
2024  --display-name "User Name"
2025
2026# Add additional feed to existing user
2027thicket add feed "username" "https://example.com/other-feed.xml"
2028```
2029
2030### Sync Feeds
2031```bash
2032# Sync all users
2033thicket sync --all
2034
2035# Sync specific user
2036thicket sync --user "username"
2037
2038# Dry run (preview changes)
2039thicket sync --all --dry-run
2040```
2041
2042### List Information
2043```bash
2044# List all users
2045thicket list users
2046
2047# List all feeds
2048thicket list feeds
2049
2050# List feeds for specific user
2051thicket list feeds --user "username"
2052
2053# List recent entries
2054thicket list entries --limit 20
2055
2056# List entries for specific user
2057thicket list entries --user "username"
2058```
2059
2060### Manage Duplicates
2061```bash
2062# List duplicate mappings
2063thicket duplicates list
2064
2065# Mark entries as duplicates
2066thicket duplicates add "https://example.com/dup" "https://example.com/canonical"
2067
2068# Remove duplicate mapping
2069thicket duplicates remove "https://example.com/dup"
2070```
2071
2072## Configuration
2073
2074Thicket uses a YAML configuration file (default: `thicket.yaml`):
2075
2076```yaml
2077git_store: ./feeds-repo
2078cache_dir: ~/.cache/thicket
2079users:
2080  - username: alice
2081    feeds:
2082      - https://alice.example.com/feed.xml
2083    email: alice@example.com
2084    homepage: https://alice.example.com
2085    display_name: Alice
2086```
2087
2088## Git Repository Structure
2089
2090```
2091feeds-repo/
2092├── index.json              # User directory index
2093├── duplicates.json         # Duplicate entry mappings
2094├── alice/
2095│   ├── metadata.json       # User metadata
2096│   ├── entry_id_1.json     # Feed entries
2097│   └── entry_id_2.json
2098└── bob/
2099    └── ...
2100```
2101
2102## Development
2103
2104### Setup
2105```bash
2106# Install in development mode
2107pip install -e .[dev]
2108
2109# Run tests
2110pytest
2111
2112# Run linting
2113ruff check src/
2114black --check src/
2115
2116# Run type checking
2117mypy src/
2118```
2119
2120### Architecture
2121
2122- **CLI**: Modern interface with Typer and Rich
2123- **Feed Processing**: Universal parsing with feedparser
2124- **Git Storage**: Structured storage with GitPython
2125- **Data Models**: Pydantic for validation and serialization
2126- **Async HTTP**: httpx for efficient feed fetching
2127
2128## Use Cases
2129
2130- **Blog Aggregation**: Collect and archive blog posts from multiple sources
2131- **Comment Networks**: Enable distributed commenting systems
2132- **Feed Archival**: Preserve feed history beyond typical feed depth limits
2133- **Content Curation**: Manage and deduplicate content across feeds
2134
2135## License
2136
2137MIT License - see LICENSE file for details.
2138</file>
2139
2140<file path="src/thicket/cli/commands/index_cmd.py">
2141"""CLI command for building reference index from blog entries."""
2142
2143import json
2144from pathlib import Path
2145from typing import Optional
2146
2147import typer
2148from rich.console import Console
2149from rich.progress import (
2150    BarColumn,
2151    Progress,
2152    SpinnerColumn,
2153    TaskProgressColumn,
2154    TextColumn,
2155)
2156from rich.table import Table
2157
2158from ...core.git_store import GitStore
2159from ...core.reference_parser import ReferenceIndex, ReferenceParser
2160from ..main import app
2161from ..utils import get_tsv_mode, load_config
2162
2163console = Console()
2164
2165
2166@app.command()
2167def index(
2168    config_file: Optional[Path] = typer.Option(
2169        None,
2170        "--config",
2171        "-c",
2172        help="Path to configuration file",
2173    ),
2174    output_file: Optional[Path] = typer.Option(
2175        None,
2176        "--output",
2177        "-o",
2178        help="Path to output index file (default: updates links.json in git store)",
2179    ),
2180    verbose: bool = typer.Option(
2181        False,
2182        "--verbose",
2183        "-v",
2184        help="Show detailed progress information",
2185    ),
2186) -> None:
2187    """Build a reference index showing which blog entries reference others.
2188
2189    This command analyzes all blog entries to detect cross-references between
2190    different blogs, creating an index that can be used to build threaded
2191    views of related content.
2192    
2193    Updates the unified links.json file with reference data.
2194    """
2195    try:
2196        # Load configuration
2197        config = load_config(config_file)
2198
2199        # Initialize Git store
2200        git_store = GitStore(config.git_store)
2201
2202        # Initialize reference parser
2203        parser = ReferenceParser()
2204
2205        # Build user domain mapping
2206        if verbose:
2207            console.print("Building user domain mapping...")
2208        user_domains = parser.build_user_domain_mapping(git_store)
2209
2210        if verbose:
2211            console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")
2212
2213        # Initialize reference index
2214        ref_index = ReferenceIndex()
2215        ref_index.user_domains = user_domains
2216
2217        # Get all users
2218        index = git_store._load_index()
2219        users = list(index.users.keys())
2220
2221        if not users:
2222            console.print("[yellow]No users found in Git store[/yellow]")
2223            raise typer.Exit(0)
2224
2225        # Process all entries
2226        total_entries = 0
2227        total_references = 0
2228        all_references = []
2229
2230        with Progress(
2231            SpinnerColumn(),
2232            TextColumn("[progress.description]{task.description}"),
2233            BarColumn(),
2234            TaskProgressColumn(),
2235            console=console,
2236        ) as progress:
2237
2238            # Count total entries first
2239            counting_task = progress.add_task("Counting entries...", total=len(users))
2240            entry_counts = {}
2241            for username in users:
2242                entries = git_store.list_entries(username)
2243                entry_counts[username] = len(entries)
2244                total_entries += len(entries)
2245                progress.advance(counting_task)
2246
2247            progress.remove_task(counting_task)
2248
2249            # Process entries - extract references
2250            processing_task = progress.add_task(
2251                f"Extracting references from {total_entries} entries...",
2252                total=total_entries
2253            )
2254
2255            for username in users:
2256                entries = git_store.list_entries(username)
2257
2258                for entry in entries:
2259                    # Extract references from this entry
2260                    references = parser.extract_references(entry, username, user_domains)
2261                    all_references.extend(references)
2262
2263                    progress.advance(processing_task)
2264
2265                    if verbose and references:
2266                        console.print(f"  Found {len(references)} references in {username}:{entry.title[:50]}...")
2267
2268            progress.remove_task(processing_task)
2269
2270            # Resolve target_entry_ids for references
2271            if all_references:
2272                resolve_task = progress.add_task(
2273                    f"Resolving {len(all_references)} references...",
2274                    total=len(all_references)
2275                )
2276
2277                if verbose:
2278                    console.print(f"Resolving target entry IDs for {len(all_references)} references...")
2279
2280                resolved_references = parser.resolve_target_entry_ids(all_references, git_store)
2281
2282                # Count resolved references
2283                resolved_count = sum(1 for ref in resolved_references if ref.target_entry_id is not None)
2284                if verbose:
2285                    console.print(f"Resolved {resolved_count} out of {len(all_references)} references")
2286
2287                # Add resolved references to index
2288                for ref in resolved_references:
2289                    ref_index.add_reference(ref)
2290                    total_references += 1
2291                    progress.advance(resolve_task)
2292
2293                progress.remove_task(resolve_task)
2294
2295        # Determine output path
2296        if output_file:
2297            output_path = output_file
2298        else:
2299            output_path = config.git_store / "links.json"
2300
2301        # Load existing links data or create new structure
2302        if output_path.exists() and not output_file:
2303            # Load existing unified structure
2304            with open(output_path) as f:
2305                existing_data = json.load(f)
2306        else:
2307            # Create new structure
2308            existing_data = {
2309                "links": {},
2310                "reverse_mapping": {},
2311                "user_domains": {}
2312            }
2313        
2314        # Update with reference data
2315        existing_data["references"] = ref_index.to_dict()["references"]
2316        existing_data["user_domains"] = {k: list(v) for k, v in user_domains.items()}
2317
2318        # Save updated structure
2319        with open(output_path, "w") as f:
2320            json.dump(existing_data, f, indent=2, default=str)
2321
2322        # Show summary
2323        if not get_tsv_mode():
2324            console.print("\n[green]✓ Reference index built successfully[/green]")
2325
2326        # Create summary table or TSV output
2327        if get_tsv_mode():
2328            print("Metric\tCount")
2329            print(f"Total Users\t{len(users)}")
2330            print(f"Total Entries\t{total_entries}")
2331            print(f"Total References\t{total_references}")
2332            print(f"Outbound Refs\t{len(ref_index.outbound_refs)}")
2333            print(f"Inbound Refs\t{len(ref_index.inbound_refs)}")
2334            print(f"Output File\t{output_path}")
2335        else:
2336            table = Table(title="Reference Index Summary")
2337            table.add_column("Metric", style="cyan")
2338            table.add_column("Count", style="green")
2339
2340            table.add_row("Total Users", str(len(users)))
2341            table.add_row("Total Entries", str(total_entries))
2342            table.add_row("Total References", str(total_references))
2343            table.add_row("Outbound Refs", str(len(ref_index.outbound_refs)))
2344            table.add_row("Inbound Refs", str(len(ref_index.inbound_refs)))
2345            table.add_row("Output File", str(output_path))
2346
2347            console.print(table)
2348
2349        # Show some interesting statistics
2350        if total_references > 0:
2351            if not get_tsv_mode():
2352                console.print("\n[bold]Reference Statistics:[/bold]")
2353
2354            # Most referenced users
2355            target_counts = {}
2356            unresolved_domains = set()
2357
2358            for ref in ref_index.references:
2359                if ref.target_username:
2360                    target_counts[ref.target_username] = target_counts.get(ref.target_username, 0) + 1
2361                else:
2362                    # Track unresolved domains
2363                    from urllib.parse import urlparse
2364                    domain = urlparse(ref.target_url).netloc.lower()
2365                    unresolved_domains.add(domain)
2366
2367            if target_counts:
2368                if get_tsv_mode():
2369                    print("Referenced User\tReference Count")
2370                    for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
2371                        print(f"{username}\t{count}")
2372                else:
2373                    console.print("\nMost referenced users:")
2374                    for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
2375                        console.print(f"  {username}: {count} references")
2376
2377            if unresolved_domains and verbose:
2378                if get_tsv_mode():
2379                    print("Unresolved Domain\tCount")
2380                    for domain in sorted(list(unresolved_domains)[:10]):
2381                        print(f"{domain}\t1")
2382                    if len(unresolved_domains) > 10:
2383                        print(f"... and {len(unresolved_domains) - 10} more\t...")
2384                else:
2385                    console.print(f"\nUnresolved domains: {len(unresolved_domains)}")
2386                    for domain in sorted(list(unresolved_domains)[:10]):
2387                        console.print(f"  {domain}")
2388                    if len(unresolved_domains) > 10:
2389                        console.print(f"  ... and {len(unresolved_domains) - 10} more")
2390
2391    except Exception as e:
2392        console.print(f"[red]Error building reference index: {e}[/red]")
2393        if verbose:
2394            console.print_exception()
2395        raise typer.Exit(1)
2396
2397
2398@app.command()
2399def threads(
2400    config_file: Optional[Path] = typer.Option(
2401        None,
2402        "--config",
2403        "-c",
2404        help="Path to configuration file",
2405    ),
2406    index_file: Optional[Path] = typer.Option(
2407        None,
2408        "--index",
2409        "-i",
2410        help="Path to reference index file (default: links.json in git store)",
2411    ),
2412    username: Optional[str] = typer.Option(
2413        None,
2414        "--username",
2415        "-u",
2416        help="Show threads for specific username only",
2417    ),
2418    entry_id: Optional[str] = typer.Option(
2419        None,
2420        "--entry",
2421        "-e",
2422        help="Show thread for specific entry ID",
2423    ),
2424    min_size: int = typer.Option(
2425        2,
2426        "--min-size",
2427        "-m",
2428        help="Minimum thread size to display",
2429    ),
2430) -> None:
2431    """Show threaded view of related blog entries.
2432
2433    This command uses the reference index to show which blog entries
2434    are connected through cross-references, creating an email-style
2435    threaded view of the conversation.
2436    
2437    Reads reference data from the unified links.json file.
2438    """
2439    try:
2440        # Load configuration
2441        config = load_config(config_file)
2442
2443        # Determine index file path
2444        if index_file:
2445            index_path = index_file
2446        else:
2447            index_path = config.git_store / "links.json"
2448
2449        if not index_path.exists():
2450            console.print(f"[red]Links file not found: {index_path}[/red]")
2451            console.print("Run 'thicket links' and 'thicket index' first to build the reference index")
2452            raise typer.Exit(1)
2453
2454        # Load unified data
2455        with open(index_path) as f:
2456            unified_data = json.load(f)
2457
2458        # Check if references exist in the unified structure
2459        if "references" not in unified_data:
2460            console.print(f"[red]No references found in {index_path}[/red]")
2461            console.print("Run 'thicket index' first to build the reference index")
2462            raise typer.Exit(1)
2463
2464        # Extract reference data and reconstruct ReferenceIndex
2465        ref_index = ReferenceIndex.from_dict({
2466            "references": unified_data["references"],
2467            "user_domains": unified_data.get("user_domains", {})
2468        })
2469
2470        # Initialize Git store to get entry details
2471        git_store = GitStore(config.git_store)
2472
2473        if entry_id and username:
2474            # Show specific thread
2475            thread_members = ref_index.get_thread_members(username, entry_id)
2476            _display_thread(thread_members, ref_index, git_store, f"Thread for {username}:{entry_id}")
2477
2478        elif username:
2479            # Show all threads involving this user
2480            user_index = git_store._load_index()
2481            user = user_index.get_user(username)
2482            if not user:
2483                console.print(f"[red]User not found: {username}[/red]")
2484                raise typer.Exit(1)
2485
2486            entries = git_store.list_entries(username)
2487            threads_found = set()
2488
2489            console.print(f"[bold]Threads involving {username}:[/bold]\n")
2490
2491            for entry in entries:
2492                thread_members = ref_index.get_thread_members(username, entry.id)
2493                if len(thread_members) >= min_size:
2494                    thread_key = tuple(sorted(thread_members))
2495                    if thread_key not in threads_found:
2496                        threads_found.add(thread_key)
2497                        _display_thread(thread_members, ref_index, git_store, f"Thread #{len(threads_found)}")
2498
2499        else:
2500            # Show all threads
2501            console.print("[bold]All conversation threads:[/bold]\n")
2502
2503            all_threads = set()
2504            processed_entries = set()
2505
2506            # Get all entries
2507            user_index = git_store._load_index()
2508            for username in user_index.users.keys():
2509                entries = git_store.list_entries(username)
2510                for entry in entries:
2511                    entry_key = (username, entry.id)
2512                    if entry_key in processed_entries:
2513                        continue
2514
2515                    thread_members = ref_index.get_thread_members(username, entry.id)
2516                    if len(thread_members) >= min_size:
2517                        thread_key = tuple(sorted(thread_members))
2518                        if thread_key not in all_threads:
2519                            all_threads.add(thread_key)
2520                            _display_thread(thread_members, ref_index, git_store, f"Thread #{len(all_threads)}")
2521
2522                            # Mark all members as processed
2523                            for member in thread_members:
2524                                processed_entries.add(member)
2525
2526            if not all_threads:
2527                console.print("[yellow]No conversation threads found[/yellow]")
2528                console.print(f"(minimum thread size: {min_size})")
2529
2530    except Exception as e:
2531        console.print(f"[red]Error showing threads: {e}[/red]")
2532        raise typer.Exit(1)
2533
2534
2535def _display_thread(thread_members, ref_index, git_store, title):
2536    """Display a single conversation thread."""
2537    console.print(f"[bold cyan]{title}[/bold cyan]")
2538    console.print(f"Thread size: {len(thread_members)} entries")
2539
2540    # Get entry details for each member
2541    thread_entries = []
2542    for username, entry_id in thread_members:
2543        entry = git_store.get_entry(username, entry_id)
2544        if entry:
2545            thread_entries.append((username, entry))
2546
2547    # Sort by publication date
2548    thread_entries.sort(key=lambda x: x[1].published or x[1].updated)
2549
2550    # Display entries
2551    for i, (username, entry) in enumerate(thread_entries):
2552        prefix = "├─" if i < len(thread_entries) - 1 else "└─"
2553
2554        # Get references for this entry
2555        outbound = ref_index.get_outbound_refs(username, entry.id)
2556        inbound = ref_index.get_inbound_refs(username, entry.id)
2557
2558        ref_info = ""
2559        if outbound or inbound:
2560            ref_info = f" ({len(outbound)} out, {len(inbound)} in)"
2561
2562        console.print(f"  {prefix} [{username}] {entry.title[:60]}...{ref_info}")
2563
2564        if entry.published:
2565            console.print(f"    Published: {entry.published.strftime('%Y-%m-%d')}")
2566
2567    console.print()  # Empty line after each thread
2568</file>
2569
2570<file path="src/thicket/cli/commands/info_cmd.py">
2571"""CLI command for displaying detailed information about a specific atom entry."""
2572
2573import json
2574from pathlib import Path
2575from typing import Optional
2576
2577import typer
2578from rich.console import Console
2579from rich.panel import Panel
2580from rich.table import Table
2581from rich.text import Text
2582
2583from ...core.git_store import GitStore
2584from ...core.reference_parser import ReferenceIndex
2585from ..main import app
2586from ..utils import load_config, get_tsv_mode
2587
2588console = Console()
2589
2590
2591@app.command()
2592def info(
2593    identifier: str = typer.Argument(
2594        ...,
2595        help="The atom ID or URL of the entry to display information about"
2596    ),
2597    username: Optional[str] = typer.Option(
2598        None,
2599        "--username",
2600        "-u",
2601        help="Username to search for the entry (if not provided, searches all users)"
2602    ),
2603    config_file: Optional[Path] = typer.Option(
2604        Path("thicket.yaml"),
2605        "--config",
2606        "-c",
2607        help="Path to configuration file",
2608    ),
2609    show_content: bool = typer.Option(
2610        False,
2611        "--content",
2612        help="Include the full content of the entry in the output"
2613    ),
2614) -> None:
2615    """Display detailed information about a specific atom entry.
2616    
2617    You can specify the entry using either its atom ID or URL.
2618    Shows all metadata for the given entry, including title, dates, categories,
2619    and summarizes all inbound and outbound links to/from other posts.
2620    """
2621    try:
2622        # Load configuration
2623        config = load_config(config_file)
2624        
2625        # Initialize Git store
2626        git_store = GitStore(config.git_store)
2627        
2628        # Find the entry
2629        entry = None
2630        found_username = None
2631        
2632        # Check if identifier looks like a URL
2633        is_url = identifier.startswith(('http://', 'https://'))
2634        
2635        if username:
2636            # Search specific username
2637            if is_url:
2638                # Search by URL
2639                entries = git_store.list_entries(username)
2640                for e in entries:
2641                    if str(e.link) == identifier:
2642                        entry = e
2643                        found_username = username
2644                        break
2645            else:
2646                # Search by atom ID
2647                entry = git_store.get_entry(username, identifier)
2648                if entry:
2649                    found_username = username
2650        else:
2651            # Search all users
2652            index = git_store._load_index()
2653            for user in index.users.keys():
2654                if is_url:
2655                    # Search by URL
2656                    entries = git_store.list_entries(user)
2657                    for e in entries:
2658                        if str(e.link) == identifier:
2659                            entry = e
2660                            found_username = user
2661                            break
2662                    if entry:
2663                        break
2664                else:
2665                    # Search by atom ID
2666                    entry = git_store.get_entry(user, identifier)
2667                    if entry:
2668                        found_username = user
2669                        break
2670        
2671        if not entry or not found_username:
2672            if username:
2673                console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found for user '{username}'[/red]")
2674            else:
2675                console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found in any user's entries[/red]")
2676            raise typer.Exit(1)
2677        
2678        # Load reference index if available
2679        links_path = config.git_store / "links.json"
2680        ref_index = None
2681        if links_path.exists():
2682            with open(links_path) as f:
2683                unified_data = json.load(f)
2684            
2685            # Check if references exist in the unified structure
2686            if "references" in unified_data:
2687                ref_index = ReferenceIndex.from_dict({
2688                    "references": unified_data["references"],
2689                    "user_domains": unified_data.get("user_domains", {})
2690                })
2691        
2692        # Display information
2693        if get_tsv_mode():
2694            _display_entry_info_tsv(entry, found_username, ref_index, show_content)
2695        else:
2696            _display_entry_info(entry, found_username)
2697            
2698            if ref_index:
2699                _display_link_info(entry, found_username, ref_index)
2700            else:
2701                console.print("\n[yellow]No reference index found. Run 'thicket links' and 'thicket index' to build cross-reference data.[/yellow]")
2702            
2703            # Optionally display content
2704            if show_content and entry.content:
2705                _display_content(entry.content)
2706            
2707    except Exception as e:
2708        console.print(f"[red]Error displaying entry info: {e}[/red]")
2709        raise typer.Exit(1)
2710
2711
2712def _display_entry_info(entry, username: str) -> None:
2713    """Display basic entry information in a structured format."""
2714    
2715    # Create main info panel
2716    info_table = Table.grid(padding=(0, 2))
2717    info_table.add_column("Field", style="cyan bold", width=15)
2718    info_table.add_column("Value", style="white")
2719    
2720    info_table.add_row("User", f"[green]{username}[/green]")
2721    info_table.add_row("Atom ID", f"[blue]{entry.id}[/blue]")
2722    info_table.add_row("Title", entry.title)
2723    info_table.add_row("Link", str(entry.link))
2724    
2725    if entry.published:
2726        info_table.add_row("Published", entry.published.strftime("%Y-%m-%d %H:%M:%S UTC"))
2727    
2728    info_table.add_row("Updated", entry.updated.strftime("%Y-%m-%d %H:%M:%S UTC"))
2729    
2730    if entry.summary:
2731        # Truncate long summaries
2732        summary = entry.summary[:200] + "..." if len(entry.summary) > 200 else entry.summary
2733        info_table.add_row("Summary", summary)
2734    
2735    if entry.categories:
2736        categories_text = ", ".join(entry.categories)
2737        info_table.add_row("Categories", categories_text)
2738    
2739    if entry.author:
2740        author_info = []
2741        if "name" in entry.author:
2742            author_info.append(entry.author["name"])
2743        if "email" in entry.author:
2744            author_info.append(f"<{entry.author['email']}>")
2745        if author_info:
2746            info_table.add_row("Author", " ".join(author_info))
2747    
2748    if entry.content_type:
2749        info_table.add_row("Content Type", entry.content_type)
2750    
2751    if entry.rights:
2752        info_table.add_row("Rights", entry.rights)
2753    
2754    if entry.source:
2755        info_table.add_row("Source Feed", entry.source)
2756    
2757    panel = Panel(
2758        info_table,
2759        title=f"[bold]Entry Information[/bold]",
2760        border_style="blue"
2761    )
2762    
2763    console.print(panel)
2764
2765
2766def _display_link_info(entry, username: str, ref_index: ReferenceIndex) -> None:
2767    """Display inbound and outbound link information."""
2768    
2769    # Get links
2770    outbound_refs = ref_index.get_outbound_refs(username, entry.id)
2771    inbound_refs = ref_index.get_inbound_refs(username, entry.id)
2772    
2773    if not outbound_refs and not inbound_refs:
2774        console.print("\n[dim]No cross-references found for this entry.[/dim]")
2775        return
2776    
2777    # Create links table
2778    links_table = Table(title="Cross-References")
2779    links_table.add_column("Direction", style="cyan", width=10)
2780    links_table.add_column("Target/Source", style="green", width=20)
2781    links_table.add_column("URL", style="blue", width=50)
2782    
2783    # Add outbound references
2784    for ref in outbound_refs:
2785        target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"
2786        links_table.add_row("→ Out", target_info, ref.target_url)
2787    
2788    # Add inbound references
2789    for ref in inbound_refs:
2790        source_info = f"{ref.source_username}:{ref.source_entry_id}"
2791        links_table.add_row("← In", source_info, ref.target_url)
2792    
2793    console.print()
2794    console.print(links_table)
2795    
2796    # Summary
2797    console.print(f"\n[bold]Summary:[/bold] {len(outbound_refs)} outbound, {len(inbound_refs)} inbound references")
2798
2799
2800def _display_content(content: str) -> None:
2801    """Display the full content of the entry."""
2802    
2803    # Truncate very long content
2804    display_content = content
2805    if len(content) > 5000:
2806        display_content = content[:5000] + "\n\n[... content truncated ...]"
2807    
2808    panel = Panel(
2809        display_content,
2810        title="[bold]Entry Content[/bold]",
2811        border_style="green",
2812        expand=False
2813    )
2814    
2815    console.print()
2816    console.print(panel)
2817
2818
2819def _display_entry_info_tsv(entry, username: str, ref_index: Optional[ReferenceIndex], show_content: bool) -> None:
2820    """Display entry information in TSV format."""
2821    
2822    # Basic info
2823    print("Field\tValue")
2824    print(f"User\t{username}")
2825    print(f"Atom ID\t{entry.id}")
2826    print(f"Title\t{entry.title.replace(chr(9), ' ').replace(chr(10), ' ').replace(chr(13), ' ')}")
2827    print(f"Link\t{entry.link}")
2828    
2829    if entry.published:
2830        print(f"Published\t{entry.published.strftime('%Y-%m-%d %H:%M:%S UTC')}")
2831    
2832    print(f"Updated\t{entry.updated.strftime('%Y-%m-%d %H:%M:%S UTC')}")
2833    
2834    if entry.summary:
2835        # Escape tabs and newlines in summary
2836        summary = entry.summary.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
2837        print(f"Summary\t{summary}")
2838    
2839    if entry.categories:
2840        print(f"Categories\t{', '.join(entry.categories)}")
2841    
2842    if entry.author:
2843        author_info = []
2844        if "name" in entry.author:
2845            author_info.append(entry.author["name"])
2846        if "email" in entry.author:
2847            author_info.append(f"<{entry.author['email']}>")
2848        if author_info:
2849            print(f"Author\t{' '.join(author_info)}")
2850    
2851    if entry.content_type:
2852        print(f"Content Type\t{entry.content_type}")
2853    
2854    if entry.rights:
2855        print(f"Rights\t{entry.rights}")
2856    
2857    if entry.source:
2858        print(f"Source Feed\t{entry.source}")
2859    
2860    # Add reference info if available
2861    if ref_index:
2862        outbound_refs = ref_index.get_outbound_refs(username, entry.id)
2863        inbound_refs = ref_index.get_inbound_refs(username, entry.id)
2864        
2865        print(f"Outbound References\t{len(outbound_refs)}")
2866        print(f"Inbound References\t{len(inbound_refs)}")
2867        
2868        # Show each reference
2869        for ref in outbound_refs:
2870            target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"
2871            print(f"Outbound Reference\t{target_info}\t{ref.target_url}")
2872        
2873        for ref in inbound_refs:
2874            source_info = f"{ref.source_username}:{ref.source_entry_id}"
2875            print(f"Inbound Reference\t{source_info}\t{ref.target_url}")
2876    
2877    # Show content if requested
2878    if show_content and entry.content:
2879        # Escape tabs and newlines in content
2880        content = entry.content.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
2881        print(f"Content\t{content}")
2882</file>
2883
2884<file path="src/thicket/cli/commands/init.py">
2885"""Initialize command for thicket."""
2886
2887from pathlib import Path
2888from typing import Optional
2889
2890import typer
2891from pydantic import ValidationError
2892
2893from ...core.git_store import GitStore
2894from ...models import ThicketConfig
2895from ..main import app
2896from ..utils import print_error, print_success, save_config
2897
2898
2899@app.command()
2900def init(
2901    git_store: Path = typer.Argument(..., help="Path to Git repository for storing feeds"),
2902    cache_dir: Optional[Path] = typer.Option(
2903        None, "--cache-dir", "-c", help="Cache directory (default: ~/.cache/thicket)"
2904    ),
2905    config_file: Optional[Path] = typer.Option(
2906        None, "--config", help="Configuration file path (default: thicket.yaml)"
2907    ),
2908    force: bool = typer.Option(
2909        False, "--force", "-f", help="Overwrite existing configuration"
2910    ),
2911) -> None:
2912    """Initialize a new thicket configuration and Git store."""
2913
2914    # Set default paths
2915    if cache_dir is None:
2916        from platformdirs import user_cache_dir
2917        cache_dir = Path(user_cache_dir("thicket"))
2918
2919    if config_file is None:
2920        config_file = Path("thicket.yaml")
2921
2922    # Check if config already exists
2923    if config_file.exists() and not force:
2924        print_error(f"Configuration file already exists: {config_file}")
2925        print_error("Use --force to overwrite")
2926        raise typer.Exit(1)
2927
2928    # Create cache directory
2929    cache_dir.mkdir(parents=True, exist_ok=True)
2930
2931    # Create Git store
2932    try:
2933        GitStore(git_store)
2934        print_success(f"Initialized Git store at: {git_store}")
2935    except Exception as e:
2936        print_error(f"Failed to initialize Git store: {e}")
2937        raise typer.Exit(1) from e
2938
2939    # Create configuration
2940    try:
2941        config = ThicketConfig(
2942            git_store=git_store,
2943            cache_dir=cache_dir,
2944            users=[]
2945        )
2946
2947        save_config(config, config_file)
2948        print_success(f"Created configuration file: {config_file}")
2949
2950    except ValidationError as e:
2951        print_error(f"Invalid configuration: {e}")
2952        raise typer.Exit(1) from e
2953    except Exception as e:
2954        print_error(f"Failed to create configuration: {e}")
2955        raise typer.Exit(1) from e
2956
2957    print_success("Thicket initialized successfully!")
2958    print_success(f"Git store: {git_store}")
2959    print_success(f"Cache directory: {cache_dir}")
2960    print_success(f"Configuration: {config_file}")
2961    print_success("Run 'thicket add user' to add your first user and feed.")
2962</file>
2963
2964<file path="src/thicket/cli/__init__.py">
2965"""CLI interface for thicket."""
2966
2967from .main import app
2968
2969__all__ = ["app"]
2970</file>
2971
2972<file path="src/thicket/core/__init__.py">
2973"""Core business logic for thicket."""
2974
2975from .feed_parser import FeedParser
2976from .git_store import GitStore
2977
2978__all__ = ["FeedParser", "GitStore"]
2979</file>
2980
2981<file path="src/thicket/core/feed_parser.py">
2982"""Feed parsing and normalization with auto-discovery."""
2983
2984from datetime import datetime
2985from typing import Optional
2986from urllib.parse import urlparse
2987
2988import bleach
2989import feedparser
2990import httpx
2991from pydantic import HttpUrl, ValidationError
2992
2993from ..models import AtomEntry, FeedMetadata
2994
2995
2996class FeedParser:
2997    """Parser for RSS/Atom feeds with normalization and auto-discovery."""
2998
2999    def __init__(self, user_agent: str = "thicket/0.1.0"):
3000        """Initialize the feed parser."""
3001        self.user_agent = user_agent
3002        self.allowed_tags = [
3003            "a", "abbr", "acronym", "b", "blockquote", "br", "code", "em",
3004            "i", "li", "ol", "p", "pre", "strong", "ul", "h1", "h2", "h3",
3005            "h4", "h5", "h6", "img", "div", "span",
3006        ]
3007        self.allowed_attributes = {
3008            "a": ["href", "title"],
3009            "abbr": ["title"],
3010            "acronym": ["title"],
3011            "img": ["src", "alt", "title", "width", "height"],
3012            "blockquote": ["cite"],
3013        }
3014
3015    async def fetch_feed(self, url: HttpUrl) -> str:
3016        """Fetch feed content from URL."""
3017        async with httpx.AsyncClient() as client:
3018            response = await client.get(
3019                str(url),
3020                headers={"User-Agent": self.user_agent},
3021                timeout=30.0,
3022                follow_redirects=True,
3023            )
3024            response.raise_for_status()
3025            return response.text
3026
3027    def parse_feed(self, content: str, source_url: Optional[HttpUrl] = None) -> tuple[FeedMetadata, list[AtomEntry]]:
3028        """Parse feed content and return metadata and entries."""
3029        parsed = feedparser.parse(content)
3030
3031        if parsed.bozo and parsed.bozo_exception:
3032            # Try to continue with potentially malformed feed
3033            pass
3034
3035        # Extract feed metadata
3036        feed_meta = self._extract_feed_metadata(parsed.feed)
3037
3038        # Extract and normalize entries
3039        entries = []
3040        for entry in parsed.entries:
3041            try:
3042                atom_entry = self._normalize_entry(entry, source_url)
3043                entries.append(atom_entry)
3044            except Exception as e:
3045                # Log error but continue processing other entries
3046                print(f"Error processing entry {getattr(entry, 'id', 'unknown')}: {e}")
3047                continue
3048
3049        return feed_meta, entries
3050
3051    def _extract_feed_metadata(self, feed: feedparser.FeedParserDict) -> FeedMetadata:
3052        """Extract metadata from feed for auto-discovery."""
3053        # Parse author information
3054        author_name = None
3055        author_email = None
3056        author_uri = None
3057
3058        if hasattr(feed, 'author_detail'):
3059            author_name = feed.author_detail.get('name')
3060            author_email = feed.author_detail.get('email')
3061            author_uri = feed.author_detail.get('href')
3062        elif hasattr(feed, 'author'):
3063            author_name = feed.author
3064
3065        # Parse managing editor for RSS feeds
3066        if not author_email and hasattr(feed, 'managingEditor'):
3067            author_email = feed.managingEditor
3068
3069        # Parse feed link
3070        feed_link = None
3071        if hasattr(feed, 'link'):
3072            try:
3073                feed_link = HttpUrl(feed.link)
3074            except ValidationError:
3075                pass
3076
3077        # Parse image/icon/logo
3078        logo = None
3079        icon = None
3080        image_url = None
3081
3082        if hasattr(feed, 'image'):
3083            try:
3084                image_url = HttpUrl(feed.image.get('href', feed.image.get('url', '')))
3085            except (ValidationError, AttributeError):
3086                pass
3087
3088        if hasattr(feed, 'icon'):
3089            try:
3090                icon = HttpUrl(feed.icon)
3091            except ValidationError:
3092                pass
3093
3094        if hasattr(feed, 'logo'):
3095            try:
3096                logo = HttpUrl(feed.logo)
3097            except ValidationError:
3098                pass
3099
3100        return FeedMetadata(
3101            title=getattr(feed, 'title', None),
3102            author_name=author_name,
3103            author_email=author_email,
3104            author_uri=HttpUrl(author_uri) if author_uri else None,
3105            link=feed_link,
3106            logo=logo,
3107            icon=icon,
3108            image_url=image_url,
3109            description=getattr(feed, 'description', None),
3110        )
3111
3112    def _normalize_entry(self, entry: feedparser.FeedParserDict, source_url: Optional[HttpUrl] = None) -> AtomEntry:
3113        """Normalize an entry to Atom format."""
3114        # Parse timestamps
3115        updated = self._parse_timestamp(entry.get('updated_parsed') or entry.get('published_parsed'))
3116        published = self._parse_timestamp(entry.get('published_parsed'))
3117
3118        # Parse content
3119        content = self._extract_content(entry)
3120        content_type = self._extract_content_type(entry)
3121
3122        # Parse author
3123        author = self._extract_author(entry)
3124
3125        # Parse categories/tags
3126        categories = []
3127        if hasattr(entry, 'tags'):
3128            categories = [tag.get('term', '') for tag in entry.tags if tag.get('term')]
3129
3130        # Sanitize HTML content
3131        if content:
3132            content = self._sanitize_html(content)
3133
3134        summary = entry.get('summary', '')
3135        if summary:
3136            summary = self._sanitize_html(summary)
3137
3138        return AtomEntry(
3139            id=entry.get('id', entry.get('link', '')),
3140            title=entry.get('title', ''),
3141            link=HttpUrl(entry.get('link', '')),
3142            updated=updated,
3143            published=published,
3144            summary=summary or None,
3145            content=content or None,
3146            content_type=content_type,
3147            author=author,
3148            categories=categories,
3149            rights=entry.get('rights', None),
3150            source=str(source_url) if source_url else None,
3151        )
3152
3153    def _parse_timestamp(self, time_struct) -> datetime:
3154        """Parse feedparser time struct to datetime."""
3155        if time_struct:
3156            return datetime(*time_struct[:6])
3157        return datetime.now()
3158
3159    def _extract_content(self, entry: feedparser.FeedParserDict) -> Optional[str]:
3160        """Extract the best content from an entry."""
3161        # Prefer content over summary
3162        if hasattr(entry, 'content') and entry.content:
3163            # Find the best content (prefer text/html, then text/plain)
3164            for content_item in entry.content:
3165                if content_item.get('type') in ['text/html', 'html']:
3166                    return content_item.get('value', '')
3167                elif content_item.get('type') in ['text/plain', 'text']:
3168                    return content_item.get('value', '')
3169            # Fallback to first content item
3170            return entry.content[0].get('value', '')
3171
3172        # Fallback to summary
3173        return entry.get('summary', '')
3174
3175    def _extract_content_type(self, entry: feedparser.FeedParserDict) -> str:
3176        """Extract content type from entry."""
3177        if hasattr(entry, 'content') and entry.content:
3178            content_type = entry.content[0].get('type', 'html')
3179            # Normalize content type
3180            if content_type in ['text/html', 'html']:
3181                return 'html'
3182            elif content_type in ['text/plain', 'text']:
3183                return 'text'
3184            elif content_type == 'xhtml':
3185                return 'xhtml'
3186        return 'html'
3187
3188    def _extract_author(self, entry: feedparser.FeedParserDict) -> Optional[dict]:
3189        """Extract author information from entry."""
3190        author = {}
3191
3192        if hasattr(entry, 'author_detail'):
3193            author.update({
3194                'name': entry.author_detail.get('name'),
3195                'email': entry.author_detail.get('email'),
3196                'uri': entry.author_detail.get('href'),
3197            })
3198        elif hasattr(entry, 'author'):
3199            author['name'] = entry.author
3200
3201        return author if author else None
3202
3203    def _sanitize_html(self, html: str) -> str:
3204        """Sanitize HTML content to prevent XSS."""
3205        return bleach.clean(
3206            html,
3207            tags=self.allowed_tags,
3208            attributes=self.allowed_attributes,
3209            strip=True,
3210        )
3211
3212    def sanitize_entry_id(self, entry_id: str) -> str:
3213        """Sanitize entry ID to be a safe filename."""
3214        # Parse URL to get meaningful parts
3215        parsed = urlparse(entry_id)
3216
3217        # Start with the path component
3218        if parsed.path:
3219            # Remove leading slash and replace problematic characters
3220            safe_id = parsed.path.lstrip('/').replace('/', '_').replace('\\', '_')
3221        else:
3222            # Use the entire ID as fallback
3223            safe_id = entry_id
3224
3225        # Replace problematic characters
3226        safe_chars = []
3227        for char in safe_id:
3228            if char.isalnum() or char in '-_.':
3229                safe_chars.append(char)
3230            else:
3231                safe_chars.append('_')
3232
3233        safe_id = ''.join(safe_chars)
3234
3235        # Ensure it's not too long (max 200 chars)
3236        if len(safe_id) > 200:
3237            safe_id = safe_id[:200]
3238
3239        # Ensure it's not empty
3240        if not safe_id:
3241            safe_id = "entry"
3242
3243        return safe_id
3244</file>
3245
3246<file path="src/thicket/core/reference_parser.py">
3247"""Reference detection and parsing for blog entries."""
3248
3249import re
3250from typing import Optional
3251from urllib.parse import urlparse
3252
3253from ..models import AtomEntry
3254
3255
3256class BlogReference:
3257    """Represents a reference from one blog entry to another."""
3258
3259    def __init__(
3260        self,
3261        source_entry_id: str,
3262        source_username: str,
3263        target_url: str,
3264        target_username: Optional[str] = None,
3265        target_entry_id: Optional[str] = None,
3266    ):
3267        self.source_entry_id = source_entry_id
3268        self.source_username = source_username
3269        self.target_url = target_url
3270        self.target_username = target_username
3271        self.target_entry_id = target_entry_id
3272
3273    def to_dict(self) -> dict:
3274        """Convert to dictionary for JSON serialization."""
3275        result = {
3276            "source_entry_id": self.source_entry_id,
3277            "source_username": self.source_username,
3278            "target_url": self.target_url,
3279        }
3280
3281        # Only include optional fields if they are not None
3282        if self.target_username is not None:
3283            result["target_username"] = self.target_username
3284        if self.target_entry_id is not None:
3285            result["target_entry_id"] = self.target_entry_id
3286
3287        return result
3288
3289    @classmethod
3290    def from_dict(cls, data: dict) -> "BlogReference":
3291        """Create from dictionary."""
3292        return cls(
3293            source_entry_id=data["source_entry_id"],
3294            source_username=data["source_username"],
3295            target_url=data["target_url"],
3296            target_username=data.get("target_username"),
3297            target_entry_id=data.get("target_entry_id"),
3298        )
3299
3300
3301class ReferenceIndex:
3302    """Index of blog-to-blog references for creating threaded views."""
3303
3304    def __init__(self):
3305        self.references: list[BlogReference] = []
3306        self.outbound_refs: dict[
3307            str, list[BlogReference]
3308        ] = {}  # entry_id -> outbound refs
3309        self.inbound_refs: dict[
3310            str, list[BlogReference]
3311        ] = {}  # entry_id -> inbound refs
3312        self.user_domains: dict[str, set[str]] = {}  # username -> set of domains
3313
3314    def add_reference(self, ref: BlogReference) -> None:
3315        """Add a reference to the index."""
3316        self.references.append(ref)
3317
3318        # Update outbound references
3319        source_key = f"{ref.source_username}:{ref.source_entry_id}"
3320        if source_key not in self.outbound_refs:
3321            self.outbound_refs[source_key] = []
3322        self.outbound_refs[source_key].append(ref)
3323
3324        # Update inbound references if we can identify the target
3325        if ref.target_username and ref.target_entry_id:
3326            target_key = f"{ref.target_username}:{ref.target_entry_id}"
3327            if target_key not in self.inbound_refs:
3328                self.inbound_refs[target_key] = []
3329            self.inbound_refs[target_key].append(ref)
3330
3331    def get_outbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:
3332        """Get all outbound references from an entry."""
3333        key = f"{username}:{entry_id}"
3334        return self.outbound_refs.get(key, [])
3335
3336    def get_inbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:
3337        """Get all inbound references to an entry."""
3338        key = f"{username}:{entry_id}"
3339        return self.inbound_refs.get(key, [])
3340
3341    def get_thread_members(self, username: str, entry_id: str) -> set[tuple[str, str]]:
3342        """Get all entries that are part of the same thread."""
3343        visited = set()
3344        to_visit = [(username, entry_id)]
3345        thread_members = set()
3346
3347        while to_visit:
3348            current_user, current_entry = to_visit.pop()
3349            if (current_user, current_entry) in visited:
3350                continue
3351
3352            visited.add((current_user, current_entry))
3353            thread_members.add((current_user, current_entry))
3354
3355            # Add outbound references
3356            for ref in self.get_outbound_refs(current_user, current_entry):
3357                if ref.target_username and ref.target_entry_id:
3358                    to_visit.append((ref.target_username, ref.target_entry_id))
3359
3360            # Add inbound references
3361            for ref in self.get_inbound_refs(current_user, current_entry):
3362                to_visit.append((ref.source_username, ref.source_entry_id))
3363
3364        return thread_members
3365
3366    def to_dict(self) -> dict:
3367        """Convert to dictionary for JSON serialization."""
3368        return {
3369            "references": [ref.to_dict() for ref in self.references],
3370            "user_domains": {k: list(v) for k, v in self.user_domains.items()},
3371        }
3372
3373    @classmethod
3374    def from_dict(cls, data: dict) -> "ReferenceIndex":
3375        """Create from dictionary."""
3376        index = cls()
3377        for ref_data in data.get("references", []):
3378            ref = BlogReference.from_dict(ref_data)
3379            index.add_reference(ref)
3380
3381        for username, domains in data.get("user_domains", {}).items():
3382            index.user_domains[username] = set(domains)
3383
3384        return index
3385
3386
3387class ReferenceParser:
3388    """Parses blog entries to detect references to other blogs."""
3389
3390    def __init__(self):
3391        # Common blog platforms and patterns
3392        self.blog_patterns = [
3393            r"https?://[^/]+\.(?:org|com|net|io|dev|me|co\.uk)/.*",  # Common blog domains
3394            r"https?://[^/]+\.github\.io/.*",  # GitHub Pages
3395            r"https?://[^/]+\.substack\.com/.*",  # Substack
3396            r"https?://medium\.com/.*",  # Medium
3397            r"https?://[^/]+\.wordpress\.com/.*",  # WordPress.com
3398            r"https?://[^/]+\.blogspot\.com/.*",  # Blogger
3399        ]
3400
3401        # Compile regex patterns
3402        self.link_pattern = re.compile(
3403            r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL
3404        )
3405        self.url_pattern = re.compile(r'https?://[^\s<>"]+')
3406
3407    def extract_links_from_html(self, html_content: str) -> list[tuple[str, str]]:
3408        """Extract all links from HTML content."""
3409        links = []
3410
3411        # Extract links from <a> tags
3412        for match in self.link_pattern.finditer(html_content):
3413            url = match.group(1)
3414            text = re.sub(
3415                r"<[^>]+>", "", match.group(2)
3416            ).strip()  # Remove HTML tags from link text
3417            links.append((url, text))
3418
3419        return links
3420
3421    def is_blog_url(self, url: str) -> bool:
3422        """Check if a URL likely points to a blog post."""
3423        for pattern in self.blog_patterns:
3424            if re.match(pattern, url):
3425                return True
3426        return False
3427
3428    def _is_likely_blog_post_url(self, url: str) -> bool:
3429        """Check if a same-domain URL likely points to a blog post (not CSS, images, etc.)."""
3430        parsed_url = urlparse(url)
3431        path = parsed_url.path.lower()
3432
3433        # Skip obvious non-blog content
3434        if any(path.endswith(ext) for ext in ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.pdf', '.xml', '.json']):
3435            return False
3436
3437        # Skip common non-blog paths
3438        if any(segment in path for segment in ['/static/', '/assets/', '/css/', '/js/', '/images/', '/img/', '/media/', '/uploads/']):
3439            return False
3440
3441        # Skip fragment-only links (same page anchors)
3442        if not path or path == '/':
3443            return False
3444
3445        # Look for positive indicators of blog posts
3446        # Common blog post patterns: dates, slugs, post indicators
3447        blog_indicators = [
3448            r'/\d{4}/',  # Year in path
3449            r'/\d{4}/\d{2}/',  # Year/month in path
3450            r'/blog/',
3451            r'/post/',
3452            r'/posts/',
3453            r'/articles?/',
3454            r'/notes?/',
3455            r'/entries/',
3456            r'/writing/',
3457        ]
3458
3459        for pattern in blog_indicators:
3460            if re.search(pattern, path):
3461                return True
3462
3463        # If it has a reasonable path depth and doesn't match exclusions, likely a blog post
3464        path_segments = [seg for seg in path.split('/') if seg]
3465        return len(path_segments) >= 1  # At least one meaningful path segment
3466
3467    def resolve_target_user(
3468        self, url: str, user_domains: dict[str, set[str]]
3469    ) -> Optional[str]:
3470        """Try to resolve a URL to a known user based on domain mapping."""
3471        parsed_url = urlparse(url)
3472        domain = parsed_url.netloc.lower()
3473
3474        for username, domains in user_domains.items():
3475            if domain in domains:
3476                return username
3477
3478        return None
3479
3480    def extract_references(
3481        self, entry: AtomEntry, username: str, user_domains: dict[str, set[str]]
3482    ) -> list[BlogReference]:
3483        """Extract all blog references from an entry."""
3484        references = []
3485
3486        # Combine all text content for analysis
3487        content_to_search = []
3488        if entry.content:
3489            content_to_search.append(entry.content)
3490        if entry.summary:
3491            content_to_search.append(entry.summary)
3492
3493        for content in content_to_search:
3494            links = self.extract_links_from_html(content)
3495
3496            for url, _link_text in links:
3497                entry_domain = (
3498                    urlparse(str(entry.link)).netloc.lower() if entry.link else ""
3499                )
3500                link_domain = urlparse(url).netloc.lower()
3501
3502                # Check if this looks like a blog URL
3503                if not self.is_blog_url(url):
3504                    continue
3505
3506                # For same-domain links, apply additional filtering to avoid non-blog content
3507                if link_domain == entry_domain:
3508                    # Only include same-domain links that look like blog posts
3509                    if not self._is_likely_blog_post_url(url):
3510                        continue
3511
3512                # Try to resolve to a known user
3513                if link_domain == entry_domain:
3514                    # Same domain - target user is the same as source user
3515                    target_username: Optional[str] = username
3516                else:
3517                    # Different domain - try to resolve
3518                    target_username = self.resolve_target_user(url, user_domains)
3519
3520                ref = BlogReference(
3521                    source_entry_id=entry.id,
3522                    source_username=username,
3523                    target_url=url,
3524                    target_username=target_username,
3525                    target_entry_id=None,  # Will be resolved later if possible
3526                )
3527
3528                references.append(ref)
3529
3530        return references
3531
3532    def build_user_domain_mapping(self, git_store: "GitStore") -> dict[str, set[str]]:
3533        """Build mapping of usernames to their known domains."""
3534        user_domains = {}
3535        index = git_store._load_index()
3536
3537        for username, user_metadata in index.users.items():
3538            domains = set()
3539
3540            # Add domains from feeds
3541            for feed_url in user_metadata.feeds:
3542                domain = urlparse(feed_url).netloc.lower()
3543                if domain:
3544                    domains.add(domain)
3545
3546            # Add domain from homepage
3547            if user_metadata.homepage:
3548                domain = urlparse(str(user_metadata.homepage)).netloc.lower()
3549                if domain:
3550                    domains.add(domain)
3551
3552            user_domains[username] = domains
3553
3554        return user_domains
3555
3556    def _build_url_to_entry_mapping(self, git_store: "GitStore") -> dict[str, str]:
3557        """Build a comprehensive mapping from URLs to entry IDs using git store data.
3558        
3559        This creates a bidirectional mapping that handles:
3560        - Entry link URLs -> Entry IDs
3561        - URL variations (with/without www, http/https)
3562        - Multiple URLs pointing to the same entry
3563        """
3564        url_to_entry: dict[str, str] = {}
3565
3566        # Load index to get all users
3567        index = git_store._load_index()
3568
3569        for username in index.users.keys():
3570            entries = git_store.list_entries(username)
3571
3572            for entry in entries:
3573                if entry.link:
3574                    link_url = str(entry.link)
3575                    entry_id = entry.id
3576
3577                    # Map the canonical link URL
3578                    url_to_entry[link_url] = entry_id
3579
3580                    # Handle common URL variations
3581                    parsed = urlparse(link_url)
3582                    if parsed.netloc and parsed.path:
3583                        # Add version without www
3584                        if parsed.netloc.startswith('www.'):
3585                            no_www_url = f"{parsed.scheme}://{parsed.netloc[4:]}{parsed.path}"
3586                            if parsed.query:
3587                                no_www_url += f"?{parsed.query}"
3588                            if parsed.fragment:
3589                                no_www_url += f"#{parsed.fragment}"
3590                            url_to_entry[no_www_url] = entry_id
3591
3592                        # Add version with www if not present
3593                        elif not parsed.netloc.startswith('www.'):
3594                            www_url = f"{parsed.scheme}://www.{parsed.netloc}{parsed.path}"
3595                            if parsed.query:
3596                                www_url += f"?{parsed.query}"
3597                            if parsed.fragment:
3598                                www_url += f"#{parsed.fragment}"
3599                            url_to_entry[www_url] = entry_id
3600
3601                        # Add http/https variations
3602                        if parsed.scheme == 'https':
3603                            http_url = link_url.replace('https://', 'http://', 1)
3604                            url_to_entry[http_url] = entry_id
3605                        elif parsed.scheme == 'http':
3606                            https_url = link_url.replace('http://', 'https://', 1)
3607                            url_to_entry[https_url] = entry_id
3608
3609        return url_to_entry
3610
3611    def _normalize_url(self, url: str) -> str:
3612        """Normalize URL for consistent matching.
3613        
3614        Handles common variations like trailing slashes, fragments, etc.
3615        """
3616        parsed = urlparse(url)
3617
3618        # Remove trailing slash from path
3619        path = parsed.path.rstrip('/') if parsed.path != '/' else parsed.path
3620
3621        # Reconstruct without fragment for consistent matching
3622        normalized = f"{parsed.scheme}://{parsed.netloc}{path}"
3623        if parsed.query:
3624            normalized += f"?{parsed.query}"
3625
3626        return normalized
3627
3628    def resolve_target_entry_ids(
3629        self, references: list[BlogReference], git_store: "GitStore"
3630    ) -> list[BlogReference]:
3631        """Resolve target_entry_id for references using comprehensive URL mapping."""
3632        resolved_refs = []
3633
3634        # Build comprehensive URL to entry ID mapping
3635        url_to_entry = self._build_url_to_entry_mapping(git_store)
3636
3637        for ref in references:
3638            # If we already have a target_entry_id, keep the reference as-is
3639            if ref.target_entry_id is not None:
3640                resolved_refs.append(ref)
3641                continue
3642
3643            # If we don't have a target_username, we can't resolve it
3644            if ref.target_username is None:
3645                resolved_refs.append(ref)
3646                continue
3647
3648            # Try to resolve using URL mapping
3649            resolved_entry_id = None
3650
3651            # First, try exact match
3652            if ref.target_url in url_to_entry:
3653                resolved_entry_id = url_to_entry[ref.target_url]
3654            else:
3655                # Try normalized URL matching
3656                normalized_target = self._normalize_url(ref.target_url)
3657                if normalized_target in url_to_entry:
3658                    resolved_entry_id = url_to_entry[normalized_target]
3659                else:
3660                    # Try URL variations
3661                    for mapped_url, entry_id in url_to_entry.items():
3662                        if self._normalize_url(mapped_url) == normalized_target:
3663                            resolved_entry_id = entry_id
3664                            break
3665
3666            # Verify the resolved entry belongs to the target username
3667            if resolved_entry_id:
3668                # Double-check by loading the actual entry
3669                entries = git_store.list_entries(ref.target_username)
3670                entry_found = any(entry.id == resolved_entry_id for entry in entries)
3671                if not entry_found:
3672                    resolved_entry_id = None
3673
3674            # Create a new reference with the resolved target_entry_id
3675            resolved_ref = BlogReference(
3676                source_entry_id=ref.source_entry_id,
3677                source_username=ref.source_username,
3678                target_url=ref.target_url,
3679                target_username=ref.target_username,
3680                target_entry_id=resolved_entry_id,
3681            )
3682            resolved_refs.append(resolved_ref)
3683
3684        return resolved_refs
3685</file>
3686
3687<file path="src/thicket/models/__init__.py">
3688"""Data models for thicket."""
3689
3690from .config import ThicketConfig, UserConfig
3691from .feed import AtomEntry, DuplicateMap, FeedMetadata
3692from .user import GitStoreIndex, UserMetadata
3693
3694__all__ = [
3695    "ThicketConfig",
3696    "UserConfig",
3697    "AtomEntry",
3698    "DuplicateMap",
3699    "FeedMetadata",
3700    "GitStoreIndex",
3701    "UserMetadata",
3702]
3703</file>
3704
3705<file path="src/thicket/models/feed.py">
3706"""Feed and entry models for thicket."""
3707
3708from datetime import datetime
3709from typing import TYPE_CHECKING, Optional
3710
3711from pydantic import BaseModel, ConfigDict, EmailStr, HttpUrl
3712
3713if TYPE_CHECKING:
3714    from .config import UserConfig
3715
3716
3717class AtomEntry(BaseModel):
3718    """Represents an Atom feed entry stored in the Git repository."""
3719
3720    model_config = ConfigDict(
3721        json_encoders={datetime: lambda v: v.isoformat()},
3722        str_strip_whitespace=True,
3723    )
3724
3725    id: str  # Original Atom ID
3726    title: str
3727    link: HttpUrl
3728    updated: datetime
3729    published: Optional[datetime] = None
3730    summary: Optional[str] = None
3731    content: Optional[str] = None  # Full body content from Atom entry
3732    content_type: Optional[str] = "html"  # text, html, xhtml
3733    author: Optional[dict] = None
3734    categories: list[str] = []
3735    rights: Optional[str] = None  # Copyright info
3736    source: Optional[str] = None  # Source feed URL
3737
3738
3739class FeedMetadata(BaseModel):
3740    """Metadata extracted from a feed for auto-discovery."""
3741
3742    title: Optional[str] = None
3743    author_name: Optional[str] = None
3744    author_email: Optional[EmailStr] = None
3745    author_uri: Optional[HttpUrl] = None
3746    link: Optional[HttpUrl] = None
3747    logo: Optional[HttpUrl] = None
3748    icon: Optional[HttpUrl] = None
3749    image_url: Optional[HttpUrl] = None
3750    description: Optional[str] = None
3751
3752    def to_user_config(self, username: str, feed_url: HttpUrl) -> "UserConfig":
3753        """Convert discovered metadata to UserConfig with fallbacks."""
3754        from .config import UserConfig
3755
3756        return UserConfig(
3757            username=username,
3758            feeds=[feed_url],
3759            display_name=self.author_name or self.title,
3760            email=self.author_email,
3761            homepage=self.author_uri or self.link,
3762            icon=self.logo or self.icon or self.image_url,
3763        )
3764
3765
3766class DuplicateMap(BaseModel):
3767    """Maps duplicate entry IDs to canonical entry IDs."""
3768
3769    duplicates: dict[str, str] = {}  # duplicate_id -> canonical_id
3770    comment: str = "Entry IDs that map to the same canonical content"
3771
3772    def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
3773        """Add a duplicate mapping."""
3774        self.duplicates[duplicate_id] = canonical_id
3775
3776    def remove_duplicate(self, duplicate_id: str) -> bool:
3777        """Remove a duplicate mapping. Returns True if existed."""
3778        return self.duplicates.pop(duplicate_id, None) is not None
3779
3780    def get_canonical(self, entry_id: str) -> str:
3781        """Get canonical ID for an entry (returns original if not duplicate)."""
3782        return self.duplicates.get(entry_id, entry_id)
3783
3784    def is_duplicate(self, entry_id: str) -> bool:
3785        """Check if entry ID is marked as duplicate."""
3786        return entry_id in self.duplicates
3787
3788    def get_duplicates_for_canonical(self, canonical_id: str) -> list[str]:
3789        """Get all duplicate IDs that map to a canonical ID."""
3790        return [
3791            duplicate_id
3792            for duplicate_id, canonical in self.duplicates.items()
3793            if canonical == canonical_id
3794        ]
3795</file>
3796
3797<file path="src/thicket/models/user.py">
3798"""User metadata models for thicket."""
3799
3800from datetime import datetime
3801from typing import Optional
3802
3803from pydantic import BaseModel, ConfigDict
3804
3805
3806class UserMetadata(BaseModel):
3807    """Metadata about a user stored in the Git repository."""
3808
3809    model_config = ConfigDict(
3810        json_encoders={datetime: lambda v: v.isoformat()},
3811        str_strip_whitespace=True,
3812    )
3813
3814    username: str
3815    display_name: Optional[str] = None
3816    email: Optional[str] = None
3817    homepage: Optional[str] = None
3818    icon: Optional[str] = None
3819    feeds: list[str] = []
3820    directory: str  # Directory name in Git store
3821    created: datetime
3822    last_updated: datetime
3823    entry_count: int = 0
3824
3825    def update_timestamp(self) -> None:
3826        """Update the last_updated timestamp to now."""
3827        self.last_updated = datetime.now()
3828
3829    def increment_entry_count(self, count: int = 1) -> None:
3830        """Increment the entry count by the given amount."""
3831        self.entry_count += count
3832        self.update_timestamp()
3833
3834
3835class GitStoreIndex(BaseModel):
3836    """Index of all users and their directories in the Git store."""
3837
3838    model_config = ConfigDict(
3839        json_encoders={datetime: lambda v: v.isoformat()}
3840    )
3841
3842    users: dict[str, UserMetadata] = {}  # username -> UserMetadata
3843    created: datetime
3844    last_updated: datetime
3845    total_entries: int = 0
3846
3847    def add_user(self, user_metadata: UserMetadata) -> None:
3848        """Add or update a user in the index."""
3849        self.users[user_metadata.username] = user_metadata
3850        self.last_updated = datetime.now()
3851
3852    def remove_user(self, username: str) -> bool:
3853        """Remove a user from the index. Returns True if user existed."""
3854        if username in self.users:
3855            del self.users[username]
3856            self.last_updated = datetime.now()
3857            return True
3858        return False
3859
3860    def get_user(self, username: str) -> Optional[UserMetadata]:
3861        """Get user metadata by username."""
3862        return self.users.get(username)
3863
3864    def update_entry_count(self, username: str, count: int) -> None:
3865        """Update entry count for a user and total."""
3866        user = self.get_user(username)
3867        if user:
3868            user.increment_entry_count(count)
3869            self.total_entries += count
3870            self.last_updated = datetime.now()
3871
3872    def recalculate_totals(self) -> None:
3873        """Recalculate total entries from all users."""
3874        self.total_entries = sum(user.entry_count for user in self.users.values())
3875        self.last_updated = datetime.now()
3876</file>
3877
3878<file path="src/thicket/utils/__init__.py">
3879"""Utility modules for thicket."""
3880
3881# This module will contain shared utilities
3882# For now, it's empty but can be expanded with common functions
3883</file>
3884
3885<file path="src/thicket/__init__.py">
3886"""Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories."""
3887
3888__version__ = "0.1.0"
3889__author__ = "thicket"
3890__email__ = "thicket@example.com"
3891</file>
3892
3893<file path="src/thicket/__main__.py">
3894"""Entry point for running thicket as a module."""
3895
3896from .cli.main import app
3897
3898if __name__ == "__main__":
3899    app()
3900</file>
3901
3902<file path=".gitignore">
3903# Byte-compiled / optimized / DLL files
3904__pycache__/
3905*.py[codz]
3906*$py.class
3907
3908# C extensions
3909*.so
3910
3911# Distribution / packaging
3912.Python
3913build/
3914develop-eggs/
3915dist/
3916downloads/
3917eggs/
3918.eggs/
3919lib/
3920lib64/
3921parts/
3922sdist/
3923var/
3924wheels/
3925share/python-wheels/
3926*.egg-info/
3927.installed.cfg
3928*.egg
3929MANIFEST
3930
3931# PyInstaller
3932#  Usually these files are written by a python script from a template
3933#  before PyInstaller builds the exe, so as to inject date/other infos into it.
3934*.manifest
3935*.spec
3936
3937# Installer logs
3938pip-log.txt
3939pip-delete-this-directory.txt
3940
3941# Unit test / coverage reports
3942htmlcov/
3943.tox/
3944.nox/
3945.coverage
3946.coverage.*
3947.cache
3948nosetests.xml
3949coverage.xml
3950*.cover
3951*.py.cover
3952.hypothesis/
3953.pytest_cache/
3954cover/
3955
3956# Translations
3957*.mo
3958*.pot
3959
3960# Django stuff:
3961*.log
3962local_settings.py
3963db.sqlite3
3964db.sqlite3-journal
3965
3966# Flask stuff:
3967instance/
3968.webassets-cache
3969
3970# Scrapy stuff:
3971.scrapy
3972
3973# Sphinx documentation
3974docs/_build/
3975
3976# PyBuilder
3977.pybuilder/
3978target/
3979
3980# Jupyter Notebook
3981.ipynb_checkpoints
3982
3983# IPython
3984profile_default/
3985ipython_config.py
3986
3987# pyenv
3988#   For a library or package, you might want to ignore these files since the code is
3989#   intended to run in multiple environments; otherwise, check them in:
3990# .python-version
3991
3992# pipenv
3993#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
3994#   However, in case of collaboration, if having platform-specific dependencies or dependencies
3995#   having no cross-platform support, pipenv may install dependencies that don't work, or not
3996#   install all needed dependencies.
3997#Pipfile.lock
3998
3999# UV
4000#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
4001#   This is especially recommended for binary packages to ensure reproducibility, and is more
4002#   commonly ignored for libraries.
4003#uv.lock
4004
4005# poetry
4006#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
4007#   This is especially recommended for binary packages to ensure reproducibility, and is more
4008#   commonly ignored for libraries.
4009#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
4010#poetry.lock
4011#poetry.toml
4012
4013# pdm
4014#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
4015#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
4016#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
4017#pdm.lock
4018#pdm.toml
4019.pdm-python
4020.pdm-build/
4021
4022# pixi
4023#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
4024#pixi.lock
4025#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
4026#   in the .venv directory. It is recommended not to include this directory in version control.
4027.pixi
4028
4029# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
4030__pypackages__/
4031
4032# Celery stuff
4033celerybeat-schedule
4034celerybeat.pid
4035
4036# SageMath parsed files
4037*.sage.py
4038
4039# Environments
4040.env
4041.envrc
4042.venv
4043env/
4044venv/
4045ENV/
4046env.bak/
4047venv.bak/
4048
4049# Spyder project settings
4050.spyderproject
4051.spyproject
4052
4053# Rope project settings
4054.ropeproject
4055
4056# mkdocs documentation
4057/site
4058
4059# mypy
4060.mypy_cache/
4061.dmypy.json
4062dmypy.json
4063
4064# Pyre type checker
4065.pyre/
4066
4067# pytype static type analyzer
4068.pytype/
4069
4070# Cython debug symbols
4071cython_debug/
4072
4073# PyCharm
4074#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
4075#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
4076#  and can be added to the global gitignore or merged into this file.  For a more nuclear
4077#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
4078#.idea/
4079
4080# Abstra
4081# Abstra is an AI-powered process automation framework.
4082# Ignore directories containing user credentials, local state, and settings.
4083# Learn more at https://abstra.io/docs
4084.abstra/
4085
4086# Visual Studio Code
4087#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
4088#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
4089#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
4090#  you could uncomment the following to ignore the entire vscode folder
4091# .vscode/
4092
4093# Ruff stuff:
4094.ruff_cache/
4095
4096# PyPI configuration file
4097.pypirc
4098
4099# Marimo
4100marimo/_static/
4101marimo/_lsp/
4102__marimo__/
4103
4104# Streamlit
4105.streamlit/secrets.toml
4106
4107thicket.yaml
4108</file>
4109
4110<file path="CLAUDE.md">
4111My goal is to build a CLI tool called thicket in Python that maintains a Git repository within which Atom feeds can be persisted, including their contents.
4112
4113# Python Environment and Package Management
4114
4115This project uses `uv` for Python package management and virtual environment handling.
4116
4117## Running Commands
4118
4119ALWAYS use `uv run` to execute Python commands:
4120
4121- Run the CLI: `uv run -m thicket`
4122- Run tests: `uv run pytest`
4123- Type checking: `uv run mypy src/`
4124- Linting: `uv run ruff check src/`
4125- Format code: `uv run ruff format src/`
4126- Compile check: `uv run python -m py_compile <file>`
4127
4128## Package Management
4129
4130- Add dependencies: `uv add <package>`
4131- Add dev dependencies: `uv add --dev <package>`
4132- Install dependencies: `uv sync`
4133- Update dependencies: `uv lock --upgrade`
4134
4135# Project Structure
4136
4137The configuration file specifies:
4138- the location of a git store
4139- a list of usernames and target Atom/RSS feed(s) and optional metadata about the username such as their email, homepage, icon and display name
4140- a cache directory to store temporary results such as feed downloads and their last modification date that speed up operations across runs of the tool
4141
4142The Git data store should:
4143- have a subdirectory per user
4144- within that directory, an entry per Atom entry indexed by the Atom id for that entry. The id should be sanitised consistently to be a safe filename. RSS feed should be normalized to Atom before storing it.
4145- within each entry file, the metadata of the Atom feed converted into a JSON format that preserves as much metadata as possible.
4146- have a JSON file in the Git repository that indexes the users, their associated directories within the Git repository, and any other metadata about that user from the config file
4147The CLI should be modern and use cool progress bars and any otfrom ecosystem libraries.
4148
4149The intention behind the Git repository is that it can be queried by other websites in order to build a webblog structure of comments that link to other blogs.
4150</file>
4151
4152<file path="pyproject.toml">
4153[build-system]
4154requires = ["hatchling"]
4155build-backend = "hatchling.build"
4156
4157[project]
4158name = "thicket"
4159dynamic = ["version"]
4160description = "A CLI tool for persisting Atom/RSS feeds in Git repositories"
4161readme = "README.md"
4162license = "MIT"
4163requires-python = ">=3.9"
4164authors = [
4165    {name = "thicket", email = "thicket@example.com"},
4166]
4167classifiers = [
4168    "Development Status :: 3 - Alpha",
4169    "Intended Audience :: Developers",
4170    "License :: OSI Approved :: MIT License",
4171    "Operating System :: OS Independent",
4172    "Programming Language :: Python :: 3",
4173    "Programming Language :: Python :: 3.9",
4174    "Programming Language :: Python :: 3.10",
4175    "Programming Language :: Python :: 3.11",
4176    "Programming Language :: Python :: 3.12",
4177    "Programming Language :: Python :: 3.13",
4178    "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary",
4179    "Topic :: Software Development :: Version Control :: Git",
4180    "Topic :: Text Processing :: Markup :: XML",
4181]
4182dependencies = [
4183    "typer>=0.15.0",
4184    "rich>=13.0.0",
4185    "GitPython>=3.1.40",
4186    "feedparser>=6.0.11",
4187    "pydantic>=2.11.0",
4188    "pydantic-settings>=2.10.0",
4189    "httpx>=0.28.0",
4190    "pendulum>=3.0.0",
4191    "bleach>=6.0.0",
4192    "platformdirs>=4.0.0",
4193    "pyyaml>=6.0.0",
4194    "email_validator",
4195    "jinja2>=3.1.6",
4196]
4197
4198[project.optional-dependencies]
4199dev = [
4200    "pytest>=8.0.0",
4201    "pytest-asyncio>=0.24.0",
4202    "pytest-cov>=6.0.0",
4203    "black>=24.0.0",
4204    "ruff>=0.8.0",
4205    "mypy>=1.13.0",
4206    "types-PyYAML>=6.0.0",
4207]
4208
4209[project.urls]
4210Homepage = "https://github.com/example/thicket"
4211Documentation = "https://github.com/example/thicket"
4212Repository = "https://github.com/example/thicket"
4213"Bug Tracker" = "https://github.com/example/thicket/issues"
4214
4215[project.scripts]
4216thicket = "thicket.cli.main:app"
4217
4218[tool.hatch.version]
4219path = "src/thicket/__init__.py"
4220
4221[tool.hatch.build.targets.wheel]
4222packages = ["src/thicket"]
4223
4224[tool.black]
4225line-length = 88
4226target-version = ['py39']
4227include = '\.pyi?$'
4228extend-exclude = '''
4229/(
4230  # directories
4231  \.eggs
4232  | \.git
4233  | \.hg
4234  | \.mypy_cache
4235  | \.tox
4236  | \.venv
4237  | build
4238  | dist
4239)/
4240'''
4241
4242[tool.ruff]
4243target-version = "py39"
4244line-length = 88
4245
4246[tool.ruff.lint]
4247select = [
4248    "E",  # pycodestyle errors
4249    "W",  # pycodestyle warnings
4250    "F",  # pyflakes
4251    "I",  # isort
4252    "B",  # flake8-bugbear
4253    "C4", # flake8-comprehensions
4254    "UP", # pyupgrade
4255]
4256ignore = [
4257    "E501",  # line too long, handled by black
4258    "B008",  # do not perform function calls in argument defaults
4259    "C901",  # too complex
4260]
4261
4262[tool.ruff.lint.per-file-ignores]
4263"__init__.py" = ["F401"]
4264
4265[tool.mypy]
4266python_version = "3.9"
4267check_untyped_defs = true
4268disallow_any_generics = true
4269disallow_incomplete_defs = true
4270disallow_untyped_defs = true
4271no_implicit_optional = true
4272warn_redundant_casts = true
4273warn_unused_ignores = true
4274warn_return_any = true
4275strict_optional = true
4276
4277[[tool.mypy.overrides]]
4278module = [
4279    "feedparser",
4280    "git",
4281    "bleach",
4282]
4283ignore_missing_imports = true
4284
4285[tool.pytest.ini_options]
4286testpaths = ["tests"]
4287python_files = ["test_*.py"]
4288python_classes = ["Test*"]
4289python_functions = ["test_*"]
4290addopts = [
4291    "-ra",
4292    "--strict-markers",
4293    "--strict-config",
4294    "--cov=src/thicket",
4295    "--cov-report=term-missing",
4296    "--cov-report=html",
4297    "--cov-report=xml",
4298]
4299filterwarnings = [
4300    "error",
4301    "ignore::UserWarning",
4302    "ignore::DeprecationWarning",
4303]
4304markers = [
4305    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
4306    "integration: marks tests as integration tests",
4307]
4308
4309[tool.coverage.run]
4310source = ["src"]
4311branch = true
4312
4313[tool.coverage.report]
4314exclude_lines = [
4315    "pragma: no cover",
4316    "def __repr__",
4317    "if self.debug:",
4318    "if settings.DEBUG",
4319    "raise AssertionError",
4320    "raise NotImplementedError",
4321    "if 0:",
4322    "if __name__ == .__main__.:",
4323    "class .*\\bProtocol\\):",
4324    "@(abc\\.)?abstractmethod",
4325]
4326</file>
4327
4328<file path="src/thicket/cli/commands/__init__.py">
4329"""CLI commands for thicket."""
4330
4331# Import all commands to register them with the main app
4332from . import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync
4333
4334__all__ = ["add", "duplicates", "generate", "index_cmd", "info_cmd", "init", "links_cmd", "list_cmd", "sync"]
4335</file>
4336
4337<file path="src/thicket/cli/commands/add.py">
4338"""Add command for thicket."""
4339
4340import asyncio
4341from pathlib import Path
4342from typing import Optional
4343
4344import typer
4345from pydantic import HttpUrl, ValidationError
4346
4347from ...core.feed_parser import FeedParser
4348from ...core.git_store import GitStore
4349from ..main import app
4350from ..utils import (
4351    create_progress,
4352    load_config,
4353    print_error,
4354    print_info,
4355    print_success,
4356)
4357
4358
4359@app.command("add")
4360def add_command(
4361    subcommand: str = typer.Argument(..., help="Subcommand: 'user' or 'feed'"),
4362    username: str = typer.Argument(..., help="Username"),
4363    feed_url: Optional[str] = typer.Argument(None, help="Feed URL (required for 'user' command)"),
4364    email: Optional[str] = typer.Option(None, "--email", "-e", help="User email"),
4365    homepage: Optional[str] = typer.Option(None, "--homepage", "-h", help="User homepage"),
4366    icon: Optional[str] = typer.Option(None, "--icon", "-i", help="User icon URL"),
4367    display_name: Optional[str] = typer.Option(None, "--display-name", "-d", help="User display name"),
4368    config_file: Optional[Path] = typer.Option(
4369        Path("thicket.yaml"), "--config", help="Configuration file path"
4370    ),
4371    auto_discover: bool = typer.Option(
4372        True, "--auto-discover/--no-auto-discover", help="Auto-discover user metadata from feed"
4373    ),
4374) -> None:
4375    """Add a user or feed to thicket."""
4376
4377    if subcommand == "user":
4378        add_user(username, feed_url, email, homepage, icon, display_name, config_file, auto_discover)
4379    elif subcommand == "feed":
4380        add_feed(username, feed_url, config_file)
4381    else:
4382        print_error(f"Unknown subcommand: {subcommand}")
4383        print_error("Use 'user' or 'feed'")
4384        raise typer.Exit(1)
4385
4386
4387def add_user(
4388    username: str,
4389    feed_url: Optional[str],
4390    email: Optional[str],
4391    homepage: Optional[str],
4392    icon: Optional[str],
4393    display_name: Optional[str],
4394    config_file: Path,
4395    auto_discover: bool,
4396) -> None:
4397    """Add a new user with feed."""
4398
4399    if not feed_url:
4400        print_error("Feed URL is required when adding a user")
4401        raise typer.Exit(1)
4402
4403    # Validate feed URL
4404    try:
4405        validated_feed_url = HttpUrl(feed_url)
4406    except ValidationError:
4407        print_error(f"Invalid feed URL: {feed_url}")
4408        raise typer.Exit(1) from None
4409
4410    # Load configuration
4411    config = load_config(config_file)
4412
4413    # Initialize Git store
4414    git_store = GitStore(config.git_store)
4415
4416    # Check if user already exists
4417    existing_user = git_store.get_user(username)
4418    if existing_user:
4419        print_error(f"User '{username}' already exists")
4420        print_error("Use 'thicket add feed' to add additional feeds")
4421        raise typer.Exit(1)
4422
4423    # Auto-discover metadata if enabled
4424    discovered_metadata = None
4425    if auto_discover:
4426        discovered_metadata = asyncio.run(discover_feed_metadata(validated_feed_url))
4427
4428    # Prepare user data with manual overrides taking precedence
4429    user_display_name = display_name or (discovered_metadata.author_name or discovered_metadata.title if discovered_metadata else None)
4430    user_email = email or (discovered_metadata.author_email if discovered_metadata else None)
4431    user_homepage = homepage or (str(discovered_metadata.author_uri or discovered_metadata.link) if discovered_metadata else None)
4432    user_icon = icon or (str(discovered_metadata.logo or discovered_metadata.icon or discovered_metadata.image_url) if discovered_metadata else None)
4433
4434    # Add user to Git store
4435    git_store.add_user(
4436        username=username,
4437        display_name=user_display_name,
4438        email=user_email,
4439        homepage=user_homepage,
4440        icon=user_icon,
4441        feeds=[str(validated_feed_url)],
4442    )
4443
4444    # Commit changes
4445    git_store.commit_changes(f"Add user: {username}")
4446
4447    print_success(f"Added user '{username}' with feed: {feed_url}")
4448
4449    if discovered_metadata and auto_discover:
4450        print_info("Auto-discovered metadata:")
4451        if user_display_name:
4452            print_info(f"  Display name: {user_display_name}")
4453        if user_email:
4454            print_info(f"  Email: {user_email}")
4455        if user_homepage:
4456            print_info(f"  Homepage: {user_homepage}")
4457        if user_icon:
4458            print_info(f"  Icon: {user_icon}")
4459
4460
4461def add_feed(username: str, feed_url: Optional[str], config_file: Path) -> None:
4462    """Add a feed to an existing user."""
4463
4464    if not feed_url:
4465        print_error("Feed URL is required")
4466        raise typer.Exit(1)
4467
4468    # Validate feed URL
4469    try:
4470        validated_feed_url = HttpUrl(feed_url)
4471    except ValidationError:
4472        print_error(f"Invalid feed URL: {feed_url}")
4473        raise typer.Exit(1) from None
4474
4475    # Load configuration
4476    config = load_config(config_file)
4477
4478    # Initialize Git store
4479    git_store = GitStore(config.git_store)
4480
4481    # Check if user exists
4482    user = git_store.get_user(username)
4483    if not user:
4484        print_error(f"User '{username}' not found")
4485        print_error("Use 'thicket add user' to add a new user")
4486        raise typer.Exit(1)
4487
4488    # Check if feed already exists
4489    if str(validated_feed_url) in user.feeds:
4490        print_error(f"Feed already exists for user '{username}': {feed_url}")
4491        raise typer.Exit(1)
4492
4493    # Add feed to user
4494    updated_feeds = user.feeds + [str(validated_feed_url)]
4495    if git_store.update_user(username, feeds=updated_feeds):
4496        git_store.commit_changes(f"Add feed to user {username}: {feed_url}")
4497        print_success(f"Added feed to user '{username}': {feed_url}")
4498    else:
4499        print_error(f"Failed to add feed to user '{username}'")
4500        raise typer.Exit(1)
4501
4502
4503async def discover_feed_metadata(feed_url: HttpUrl):
4504    """Discover metadata from a feed URL."""
4505    try:
4506        with create_progress() as progress:
4507            task = progress.add_task("Discovering feed metadata...", total=None)
4508
4509            parser = FeedParser()
4510            content = await parser.fetch_feed(feed_url)
4511            metadata, _ = parser.parse_feed(content, feed_url)
4512
4513            progress.update(task, completed=True)
4514            return metadata
4515
4516    except Exception as e:
4517        print_error(f"Failed to discover feed metadata: {e}")
4518        return None
4519</file>
4520
4521<file path="src/thicket/cli/commands/duplicates.py">
4522"""Duplicates command for thicket."""
4523
4524from pathlib import Path
4525from typing import Optional
4526
4527import typer
4528from rich.table import Table
4529
4530from ...core.git_store import GitStore
4531from ..main import app
4532from ..utils import (
4533    console,
4534    load_config,
4535    print_error,
4536    print_info,
4537    print_success,
4538    get_tsv_mode,
4539)
4540
4541
4542@app.command("duplicates")
4543def duplicates_command(
4544    action: str = typer.Argument(..., help="Action: 'list', 'add', 'remove'"),
4545    duplicate_id: Optional[str] = typer.Argument(None, help="Duplicate entry ID"),
4546    canonical_id: Optional[str] = typer.Argument(None, help="Canonical entry ID"),
4547    config_file: Optional[Path] = typer.Option(
4548        Path("thicket.yaml"), "--config", help="Configuration file path"
4549    ),
4550) -> None:
4551    """Manage duplicate entry mappings."""
4552
4553    # Load configuration
4554    config = load_config(config_file)
4555
4556    # Initialize Git store
4557    git_store = GitStore(config.git_store)
4558
4559    if action == "list":
4560        list_duplicates(git_store)
4561    elif action == "add":
4562        add_duplicate(git_store, duplicate_id, canonical_id)
4563    elif action == "remove":
4564        remove_duplicate(git_store, duplicate_id)
4565    else:
4566        print_error(f"Unknown action: {action}")
4567        print_error("Use 'list', 'add', or 'remove'")
4568        raise typer.Exit(1)
4569
4570
4571def list_duplicates(git_store: GitStore) -> None:
4572    """List all duplicate mappings."""
4573    duplicates = git_store.get_duplicates()
4574
4575    if not duplicates.duplicates:
4576        if get_tsv_mode():
4577            print("No duplicate mappings found")
4578        else:
4579            print_info("No duplicate mappings found")
4580        return
4581
4582    if get_tsv_mode():
4583        print("Duplicate ID\tCanonical ID")
4584        for duplicate_id, canonical_id in duplicates.duplicates.items():
4585            print(f"{duplicate_id}\t{canonical_id}")
4586        print(f"Total duplicates: {len(duplicates.duplicates)}")
4587    else:
4588        table = Table(title="Duplicate Entry Mappings")
4589        table.add_column("Duplicate ID", style="red")
4590        table.add_column("Canonical ID", style="green")
4591
4592        for duplicate_id, canonical_id in duplicates.duplicates.items():
4593            table.add_row(duplicate_id, canonical_id)
4594
4595        console.print(table)
4596        print_info(f"Total duplicates: {len(duplicates.duplicates)}")
4597
4598
4599def add_duplicate(git_store: GitStore, duplicate_id: Optional[str], canonical_id: Optional[str]) -> None:
4600    """Add a duplicate mapping."""
4601    if not duplicate_id:
4602        print_error("Duplicate ID is required")
4603        raise typer.Exit(1)
4604
4605    if not canonical_id:
4606        print_error("Canonical ID is required")
4607        raise typer.Exit(1)
4608
4609    # Check if duplicate_id already exists
4610    duplicates = git_store.get_duplicates()
4611    if duplicates.is_duplicate(duplicate_id):
4612        existing_canonical = duplicates.get_canonical(duplicate_id)
4613        print_error(f"Duplicate ID already mapped to: {existing_canonical}")
4614        print_error("Use 'remove' first to change the mapping")
4615        raise typer.Exit(1)
4616
4617    # Check if we're trying to make a canonical ID point to itself
4618    if duplicate_id == canonical_id:
4619        print_error("Duplicate ID cannot be the same as canonical ID")
4620        raise typer.Exit(1)
4621
4622    # Add the mapping
4623    git_store.add_duplicate(duplicate_id, canonical_id)
4624
4625    # Commit changes
4626    git_store.commit_changes(f"Add duplicate mapping: {duplicate_id} -> {canonical_id}")
4627
4628    print_success(f"Added duplicate mapping: {duplicate_id} -> {canonical_id}")
4629
4630
4631def remove_duplicate(git_store: GitStore, duplicate_id: Optional[str]) -> None:
4632    """Remove a duplicate mapping."""
4633    if not duplicate_id:
4634        print_error("Duplicate ID is required")
4635        raise typer.Exit(1)
4636
4637    # Check if mapping exists
4638    duplicates = git_store.get_duplicates()
4639    if not duplicates.is_duplicate(duplicate_id):
4640        print_error(f"No duplicate mapping found for: {duplicate_id}")
4641        raise typer.Exit(1)
4642
4643    canonical_id = duplicates.get_canonical(duplicate_id)
4644
4645    # Remove the mapping
4646    if git_store.remove_duplicate(duplicate_id):
4647        # Commit changes
4648        git_store.commit_changes(f"Remove duplicate mapping: {duplicate_id} -> {canonical_id}")
4649        print_success(f"Removed duplicate mapping: {duplicate_id} -> {canonical_id}")
4650    else:
4651        print_error(f"Failed to remove duplicate mapping: {duplicate_id}")
4652        raise typer.Exit(1)
4653</file>
4654
4655<file path="src/thicket/cli/commands/sync.py">
4656"""Sync command for thicket."""
4657
4658import asyncio
4659from pathlib import Path
4660from typing import Optional
4661
4662import typer
4663from rich.progress import track
4664
4665from ...core.feed_parser import FeedParser
4666from ...core.git_store import GitStore
4667from ..main import app
4668from ..utils import (
4669    load_config,
4670    print_error,
4671    print_info,
4672    print_success,
4673)
4674
4675
4676@app.command()
4677def sync(
4678    all_users: bool = typer.Option(
4679        False, "--all", "-a", help="Sync all users and feeds"
4680    ),
4681    user: Optional[str] = typer.Option(
4682        None, "--user", "-u", help="Sync specific user only"
4683    ),
4684    config_file: Optional[Path] = typer.Option(
4685        Path("thicket.yaml"), "--config", help="Configuration file path"
4686    ),
4687    dry_run: bool = typer.Option(
4688        False, "--dry-run", help="Show what would be synced without making changes"
4689    ),
4690) -> None:
4691    """Sync feeds and store entries in Git repository."""
4692
4693    # Load configuration
4694    config = load_config(config_file)
4695
4696    # Initialize Git store
4697    git_store = GitStore(config.git_store)
4698
4699    # Determine which users to sync from git repository
4700    users_to_sync = []
4701    if all_users:
4702        index = git_store._load_index()
4703        users_to_sync = list(index.users.values())
4704    elif user:
4705        user_metadata = git_store.get_user(user)
4706        if not user_metadata:
4707            print_error(f"User '{user}' not found in git repository")
4708            raise typer.Exit(1)
4709        users_to_sync = [user_metadata]
4710    else:
4711        print_error("Specify --all to sync all users or --user to sync a specific user")
4712        raise typer.Exit(1)
4713
4714    if not users_to_sync:
4715        print_info("No users configured to sync")
4716        return
4717
4718    # Sync each user
4719    total_new_entries = 0
4720    total_updated_entries = 0
4721
4722    for user_metadata in users_to_sync:
4723        print_info(f"Syncing user: {user_metadata.username}")
4724
4725        user_new_entries = 0
4726        user_updated_entries = 0
4727
4728        # Sync each feed for the user
4729        for feed_url in track(user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"):
4730            try:
4731                new_entries, updated_entries = asyncio.run(
4732                    sync_feed(git_store, user_metadata.username, feed_url, dry_run)
4733                )
4734                user_new_entries += new_entries
4735                user_updated_entries += updated_entries
4736
4737            except Exception as e:
4738                print_error(f"Failed to sync feed {feed_url}: {e}")
4739                continue
4740
4741        print_info(f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated")
4742        total_new_entries += user_new_entries
4743        total_updated_entries += user_updated_entries
4744
4745    # Commit changes if not dry run
4746    if not dry_run and (total_new_entries > 0 or total_updated_entries > 0):
4747        commit_message = f"Sync feeds: {total_new_entries} new entries, {total_updated_entries} updated"
4748        git_store.commit_changes(commit_message)
4749        print_success(f"Committed changes: {commit_message}")
4750
4751    # Summary
4752    if dry_run:
4753        print_info(f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated")
4754    else:
4755        print_success(f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated")
4756
4757
4758async def sync_feed(git_store: GitStore, username: str, feed_url, dry_run: bool) -> tuple[int, int]:
4759    """Sync a single feed for a user."""
4760
4761    parser = FeedParser()
4762
4763    try:
4764        # Fetch and parse feed
4765        content = await parser.fetch_feed(feed_url)
4766        metadata, entries = parser.parse_feed(content, feed_url)
4767
4768        new_entries = 0
4769        updated_entries = 0
4770
4771        # Process each entry
4772        for entry in entries:
4773            try:
4774                # Check if entry already exists
4775                existing_entry = git_store.get_entry(username, entry.id)
4776
4777                if existing_entry:
4778                    # Check if entry has been updated
4779                    if existing_entry.updated != entry.updated:
4780                        if not dry_run:
4781                            git_store.store_entry(username, entry)
4782                        updated_entries += 1
4783                else:
4784                    # New entry
4785                    if not dry_run:
4786                        git_store.store_entry(username, entry)
4787                    new_entries += 1
4788
4789            except Exception as e:
4790                print_error(f"Failed to process entry {entry.id}: {e}")
4791                continue
4792
4793        return new_entries, updated_entries
4794
4795    except Exception as e:
4796        print_error(f"Failed to sync feed {feed_url}: {e}")
4797        return 0, 0
4798</file>
4799
4800<file path="src/thicket/models/config.py">
4801"""Configuration models for thicket."""
4802
4803from pathlib import Path
4804from typing import Optional
4805
4806from pydantic import BaseModel, EmailStr, HttpUrl
4807from pydantic_settings import BaseSettings, SettingsConfigDict
4808
4809
4810class UserConfig(BaseModel):
4811    """Configuration for a single user and their feeds."""
4812
4813    username: str
4814    feeds: list[HttpUrl]
4815    email: Optional[EmailStr] = None
4816    homepage: Optional[HttpUrl] = None
4817    icon: Optional[HttpUrl] = None
4818    display_name: Optional[str] = None
4819
4820
4821class ThicketConfig(BaseSettings):
4822    """Main configuration for thicket."""
4823
4824    model_config = SettingsConfigDict(
4825        env_prefix="THICKET_",
4826        env_file=".env",
4827        yaml_file="thicket.yaml",
4828        case_sensitive=False,
4829    )
4830
4831    git_store: Path
4832    cache_dir: Path
4833    users: list[UserConfig] = []
4834</file>
4835
4836<file path="src/thicket/cli/commands/links_cmd.py">
4837"""CLI command for extracting and categorizing all outbound links from blog entries."""
4838
4839import json
4840import re
4841from pathlib import Path
4842from typing import Dict, List, Optional, Set
4843from urllib.parse import urljoin, urlparse
4844
4845import typer
4846from rich.console import Console
4847from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
4848from rich.table import Table
4849
4850from ...core.git_store import GitStore
4851from ..main import app
4852from ..utils import load_config, get_tsv_mode
4853
4854console = Console()
4855
4856
4857class LinkData:
4858    """Represents a link found in a blog entry."""
4859    
4860    def __init__(self, url: str, entry_id: str, username: str):
4861        self.url = url
4862        self.entry_id = entry_id
4863        self.username = username
4864    
4865    def to_dict(self) -> dict:
4866        """Convert to dictionary for JSON serialization."""
4867        return {
4868            "url": self.url,
4869            "entry_id": self.entry_id,
4870            "username": self.username
4871        }
4872    
4873    @classmethod
4874    def from_dict(cls, data: dict) -> "LinkData":
4875        """Create from dictionary."""
4876        return cls(
4877            url=data["url"],
4878            entry_id=data["entry_id"],
4879            username=data["username"]
4880        )
4881
4882
4883class LinkCategorizer:
4884    """Categorizes links as internal, user, or unknown."""
4885    
4886    def __init__(self, user_domains: Dict[str, Set[str]]):
4887        self.user_domains = user_domains
4888        # Create reverse mapping of domain -> username
4889        self.domain_to_user = {}
4890        for username, domains in user_domains.items():
4891            for domain in domains:
4892                self.domain_to_user[domain] = username
4893    
4894    def categorize_url(self, url: str, source_username: str) -> tuple[str, Optional[str]]:
4895        """
4896        Categorize a URL as 'internal', 'user', or 'unknown'.
4897        Returns (category, target_username).
4898        """
4899        try:
4900            parsed = urlparse(url)
4901            domain = parsed.netloc.lower()
4902            
4903            # Check if it's a link to the same user's domain (internal)
4904            if domain in self.user_domains.get(source_username, set()):
4905                return "internal", source_username
4906            
4907            # Check if it's a link to another user's domain
4908            if domain in self.domain_to_user:
4909                return "user", self.domain_to_user[domain]
4910            
4911            # Everything else is unknown
4912            return "unknown", None
4913            
4914        except Exception:
4915            return "unknown", None
4916
4917
4918class LinkExtractor:
4919    """Extracts and resolves links from blog entries."""
4920    
4921    def __init__(self):
4922        # Pattern for extracting links from HTML
4923        self.link_pattern = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL)
4924        self.url_pattern = re.compile(r'https?://[^\s<>"]+')
4925    
4926    def extract_links_from_html(self, html_content: str, base_url: str) -> List[tuple[str, str]]:
4927        """Extract all links from HTML content and resolve them against base URL."""
4928        links = []
4929        
4930        # Extract links from <a> tags
4931        for match in self.link_pattern.finditer(html_content):
4932            url = match.group(1)
4933            text = re.sub(r'<[^>]+>', '', match.group(2)).strip()  # Remove HTML tags from link text
4934            
4935            # Resolve relative URLs against base URL
4936            resolved_url = urljoin(base_url, url)
4937            links.append((resolved_url, text))
4938        
4939        return links
4940    
4941    
4942    def extract_links_from_entry(self, entry, username: str, base_url: str) -> List[LinkData]:
4943        """Extract all links from a blog entry."""
4944        links = []
4945        
4946        # Combine all text content for analysis
4947        content_to_search = []
4948        if entry.content:
4949            content_to_search.append(entry.content)
4950        if entry.summary:
4951            content_to_search.append(entry.summary)
4952        
4953        for content in content_to_search:
4954            extracted_links = self.extract_links_from_html(content, base_url)
4955            
4956            for url, link_text in extracted_links:
4957                # Skip empty URLs
4958                if not url or url.startswith('#'):
4959                    continue
4960                
4961                link_data = LinkData(
4962                    url=url,
4963                    entry_id=entry.id,
4964                    username=username
4965                )
4966                
4967                links.append(link_data)
4968        
4969        return links
4970
4971
4972@app.command()
4973def links(
4974    config_file: Optional[Path] = typer.Option(
4975        Path("thicket.yaml"),
4976        "--config",
4977        "-c",
4978        help="Path to configuration file",
4979    ),
4980    output_file: Optional[Path] = typer.Option(
4981        None,
4982        "--output",
4983        "-o",
4984        help="Path to output unified links file (default: links.json in git store)",
4985    ),
4986    verbose: bool = typer.Option(
4987        False,
4988        "--verbose",
4989        "-v",
4990        help="Show detailed progress information",
4991    ),
4992) -> None:
4993    """Extract and categorize all outbound links from blog entries.
4994    
4995    This command analyzes all blog entries to extract outbound links,
4996    resolve them properly with respect to the feed's base URL, and
4997    categorize them as internal, user, or unknown links.
4998    
4999    Creates a unified links.json file containing all link data.
5000    """
5001    try:
5002        # Load configuration
5003        config = load_config(config_file)
5004
5005        # Initialize Git store
5006        git_store = GitStore(config.git_store)
5007        
5008        # Build user domain mapping
5009        if verbose:
5010            console.print("Building user domain mapping...")
5011        
5012        index = git_store._load_index()
5013        user_domains = {}
5014        
5015        for username, user_metadata in index.users.items():
5016            domains = set()
5017            
5018            # Add domains from feeds
5019            for feed_url in user_metadata.feeds:
5020                domain = urlparse(feed_url).netloc.lower()
5021                if domain:
5022                    domains.add(domain)
5023            
5024            # Add domain from homepage
5025            if user_metadata.homepage:
5026                domain = urlparse(str(user_metadata.homepage)).netloc.lower()
5027                if domain:
5028                    domains.add(domain)
5029            
5030            user_domains[username] = domains
5031        
5032        if verbose:
5033            console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")
5034        
5035        # Initialize components
5036        link_extractor = LinkExtractor()
5037        categorizer = LinkCategorizer(user_domains)
5038        
5039        # Get all users
5040        users = list(index.users.keys())
5041        
5042        if not users:
5043            console.print("[yellow]No users found in Git store[/yellow]")
5044            raise typer.Exit(0)
5045        
5046        # Process all entries
5047        all_links = []
5048        link_categories = {"internal": [], "user": [], "unknown": []}
5049        link_dict = {}  # Dictionary with link URL as key, maps to list of atom IDs
5050        reverse_dict = {}  # Dictionary with atom ID as key, maps to list of URLs
5051        
5052        with Progress(
5053            SpinnerColumn(),
5054            TextColumn("[progress.description]{task.description}"),
5055            BarColumn(),
5056            TaskProgressColumn(),
5057            console=console,
5058        ) as progress:
5059            
5060            # Count total entries first
5061            counting_task = progress.add_task("Counting entries...", total=len(users))
5062            total_entries = 0
5063            
5064            for username in users:
5065                entries = git_store.list_entries(username)
5066                total_entries += len(entries)
5067                progress.advance(counting_task)
5068            
5069            progress.remove_task(counting_task)
5070            
5071            # Process entries
5072            processing_task = progress.add_task(
5073                f"Processing {total_entries} entries...", 
5074                total=total_entries
5075            )
5076            
5077            for username in users:
5078                entries = git_store.list_entries(username)
5079                user_metadata = index.users[username]
5080                
5081                # Get base URL for this user (use first feed URL)
5082                base_url = str(user_metadata.feeds[0]) if user_metadata.feeds else "https://example.com"
5083                
5084                for entry in entries:
5085                    # Extract links from this entry
5086                    entry_links = link_extractor.extract_links_from_entry(entry, username, base_url)
5087                    
5088                    # Track unique links per entry
5089                    entry_urls_seen = set()
5090                    
5091                    # Categorize each link
5092                    for link_data in entry_links:
5093                        # Skip if we've already seen this URL in this entry
5094                        if link_data.url in entry_urls_seen:
5095                            continue
5096                        entry_urls_seen.add(link_data.url)
5097                        
5098                        category, target_username = categorizer.categorize_url(link_data.url, username)
5099                        
5100                        # Add to link dictionary (URL as key, maps to list of atom IDs)
5101                        if link_data.url not in link_dict:
5102                            link_dict[link_data.url] = []
5103                        if link_data.entry_id not in link_dict[link_data.url]:
5104                            link_dict[link_data.url].append(link_data.entry_id)
5105                            
5106                            # Also add to reverse mapping (atom ID -> list of URLs)
5107                            if link_data.entry_id not in reverse_dict:
5108                                reverse_dict[link_data.entry_id] = []
5109                            if link_data.url not in reverse_dict[link_data.entry_id]:
5110                                reverse_dict[link_data.entry_id].append(link_data.url)
5111                        
5112                        # Add category info to link data for categories tracking
5113                        link_info = link_data.to_dict()
5114                        link_info["category"] = category
5115                        link_info["target_username"] = target_username
5116                        
5117                        all_links.append(link_info)
5118                        link_categories[category].append(link_info)
5119                    
5120                    progress.advance(processing_task)
5121                    
5122                    if verbose and entry_links:
5123                        console.print(f"  Found {len(entry_links)} links in {username}:{entry.title[:50]}...")
5124        
5125        # Determine output path
5126        if output_file:
5127            output_path = output_file
5128        else:
5129            output_path = config.git_store / "links.json"
5130        
5131        # Save all extracted links (not just filtered ones)
5132        if verbose:
5133            console.print("Preparing output data...")
5134        
5135        # Build a set of all URLs that correspond to posts in the git database
5136        registered_urls = set()
5137        
5138        # Get all entries from all users and build URL mappings
5139        for username in users:
5140            entries = git_store.list_entries(username)
5141            user_metadata = index.users[username]
5142            
5143            for entry in entries:
5144                # Try to match entry URLs with extracted links
5145                if hasattr(entry, 'link') and entry.link:
5146                    registered_urls.add(str(entry.link))
5147                
5148                # Also check entry alternate links if they exist
5149                if hasattr(entry, 'links') and entry.links:
5150                    for link in entry.links:
5151                        if hasattr(link, 'href') and link.href:
5152                            registered_urls.add(str(link.href))
5153        
5154        # Build unified structure with metadata
5155        unified_links = {}
5156        reverse_mapping = {}
5157        
5158        for url, entry_ids in link_dict.items():
5159            unified_links[url] = {
5160                "referencing_entries": entry_ids
5161            }
5162            
5163            # Find target username if this is a tracked post
5164            if url in registered_urls:
5165                for username in users:
5166                    user_domains_set = {domain for domain in user_domains.get(username, [])}
5167                    if any(domain in url for domain in user_domains_set):
5168                        unified_links[url]["target_username"] = username
5169                        break
5170            
5171            # Build reverse mapping
5172            for entry_id in entry_ids:
5173                if entry_id not in reverse_mapping:
5174                    reverse_mapping[entry_id] = []
5175                if url not in reverse_mapping[entry_id]:
5176                    reverse_mapping[entry_id].append(url)
5177        
5178        # Create unified output data
5179        output_data = {
5180            "links": unified_links,
5181            "reverse_mapping": reverse_mapping,
5182            "user_domains": {k: list(v) for k, v in user_domains.items()}
5183        }
5184        
5185        if verbose:
5186            console.print(f"Found {len(registered_urls)} registered post URLs")
5187            console.print(f"Found {len(link_dict)} total links, {sum(1 for link in unified_links.values() if 'target_username' in link)} tracked posts")
5188        
5189        # Save unified data
5190        with open(output_path, "w") as f:
5191            json.dump(output_data, f, indent=2, default=str)
5192        
5193        # Show summary
5194        if not get_tsv_mode():
5195            console.print("\n[green]✓ Links extraction completed successfully[/green]")
5196        
5197        # Create summary table or TSV output
5198        if get_tsv_mode():
5199            print("Category\tCount\tDescription")
5200            print(f"Internal\t{len(link_categories['internal'])}\tLinks to same user's domain")
5201            print(f"User\t{len(link_categories['user'])}\tLinks to other tracked users")
5202            print(f"Unknown\t{len(link_categories['unknown'])}\tLinks to external sites")
5203            print(f"Total Extracted\t{len(all_links)}\tAll extracted links")
5204            print(f"Saved to Output\t{len(output_data['links'])}\tLinks saved to output file")
5205            print(f"Cross-references\t{sum(1 for link in unified_links.values() if 'target_username' in link)}\tLinks to registered posts only")
5206        else:
5207            table = Table(title="Links Summary")
5208            table.add_column("Category", style="cyan")
5209            table.add_column("Count", style="green")
5210            table.add_column("Description", style="white")
5211            
5212            table.add_row("Internal", str(len(link_categories["internal"])), "Links to same user's domain")
5213            table.add_row("User", str(len(link_categories["user"])), "Links to other tracked users")
5214            table.add_row("Unknown", str(len(link_categories["unknown"])), "Links to external sites")
5215            table.add_row("Total Extracted", str(len(all_links)), "All extracted links")
5216            table.add_row("Saved to Output", str(len(output_data['links'])), "Links saved to output file")
5217            table.add_row("Cross-references", str(sum(1 for link in unified_links.values() if 'target_username' in link)), "Links to registered posts only")
5218            
5219            console.print(table)
5220        
5221        # Show user links if verbose
5222        if verbose and link_categories["user"]:
5223            if get_tsv_mode():
5224                print("User Link Source\tUser Link Target\tLink Count")
5225                user_link_counts = {}
5226                
5227                for link in link_categories["user"]:
5228                    key = f"{link['username']} -> {link['target_username']}"
5229                    user_link_counts[key] = user_link_counts.get(key, 0) + 1
5230                
5231                for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
5232                    source, target = link_pair.split(" -> ")
5233                    print(f"{source}\t{target}\t{count}")
5234            else:
5235                console.print("\n[bold]User-to-user links:[/bold]")
5236                user_link_counts = {}
5237                
5238                for link in link_categories["user"]:
5239                    key = f"{link['username']} -> {link['target_username']}"
5240                    user_link_counts[key] = user_link_counts.get(key, 0) + 1
5241                
5242                for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
5243                    console.print(f"  {link_pair}: {count} links")
5244        
5245        if not get_tsv_mode():
5246            console.print(f"\nUnified links data saved to: {output_path}")
5247
5248    except Exception as e:
5249        console.print(f"[red]Error extracting links: {e}[/red]")
5250        if verbose:
5251            console.print_exception()
5252        raise typer.Exit(1)
5253</file>
5254
5255<file path="src/thicket/cli/commands/list_cmd.py">
5256"""List command for thicket."""
5257
5258import re
5259from pathlib import Path
5260from typing import Optional
5261
5262import typer
5263from rich.table import Table
5264
5265from ...core.git_store import GitStore
5266from ..main import app
5267from ..utils import (
5268    console,
5269    load_config,
5270    print_error,
5271    print_feeds_table,
5272    print_feeds_table_from_git,
5273    print_info,
5274    print_users_table,
5275    print_users_table_from_git,
5276    print_entries_tsv,
5277    get_tsv_mode,
5278)
5279
5280
5281@app.command("list")
5282def list_command(
5283    what: str = typer.Argument(..., help="What to list: 'users', 'feeds', 'entries'"),
5284    user: Optional[str] = typer.Option(
5285        None, "--user", "-u", help="Filter by specific user"
5286    ),
5287    limit: Optional[int] = typer.Option(
5288        None, "--limit", "-l", help="Limit number of results"
5289    ),
5290    config_file: Optional[Path] = typer.Option(
5291        Path("thicket.yaml"), "--config", help="Configuration file path"
5292    ),
5293) -> None:
5294    """List users, feeds, or entries."""
5295
5296    # Load configuration
5297    config = load_config(config_file)
5298
5299    # Initialize Git store
5300    git_store = GitStore(config.git_store)
5301
5302    if what == "users":
5303        list_users(git_store)
5304    elif what == "feeds":
5305        list_feeds(git_store, user)
5306    elif what == "entries":
5307        list_entries(git_store, user, limit)
5308    else:
5309        print_error(f"Unknown list type: {what}")
5310        print_error("Use 'users', 'feeds', or 'entries'")
5311        raise typer.Exit(1)
5312
5313
5314def list_users(git_store: GitStore) -> None:
5315    """List all users."""
5316    index = git_store._load_index()
5317    users = list(index.users.values())
5318    
5319    if not users:
5320        print_info("No users configured")
5321        return
5322
5323    print_users_table_from_git(users)
5324
5325
5326def list_feeds(git_store: GitStore, username: Optional[str] = None) -> None:
5327    """List feeds, optionally filtered by user."""
5328    if username:
5329        user = git_store.get_user(username)
5330        if not user:
5331            print_error(f"User '{username}' not found")
5332            raise typer.Exit(1)
5333
5334        if not user.feeds:
5335            print_info(f"No feeds configured for user '{username}'")
5336            return
5337
5338    print_feeds_table_from_git(git_store, username)
5339
5340
5341def list_entries(git_store: GitStore, username: Optional[str] = None, limit: Optional[int] = None) -> None:
5342    """List entries, optionally filtered by user."""
5343
5344    if username:
5345        # List entries for specific user
5346        user = git_store.get_user(username)
5347        if not user:
5348            print_error(f"User '{username}' not found")
5349            raise typer.Exit(1)
5350
5351        entries = git_store.list_entries(username, limit)
5352        if not entries:
5353            print_info(f"No entries found for user '{username}'")
5354            return
5355
5356        print_entries_table([entries], [username])
5357
5358    else:
5359        # List entries for all users
5360        all_entries = []
5361        all_usernames = []
5362
5363        index = git_store._load_index()
5364        for user in index.users.values():
5365            entries = git_store.list_entries(user.username, limit)
5366            if entries:
5367                all_entries.append(entries)
5368                all_usernames.append(user.username)
5369
5370        if not all_entries:
5371            print_info("No entries found")
5372            return
5373
5374        print_entries_table(all_entries, all_usernames)
5375
5376
5377def _clean_html_content(content: Optional[str]) -> str:
5378    """Clean HTML content for display in table."""
5379    if not content:
5380        return ""
5381    
5382    # Remove HTML tags
5383    clean_text = re.sub(r'<[^>]+>', ' ', content)
5384    # Replace multiple whitespace with single space
5385    clean_text = re.sub(r'\s+', ' ', clean_text)
5386    # Strip and limit length
5387    clean_text = clean_text.strip()
5388    if len(clean_text) > 100:
5389        clean_text = clean_text[:97] + "..."
5390    
5391    return clean_text
5392
5393
5394def print_entries_table(entries_by_user: list[list], usernames: list[str]) -> None:
5395    """Print a table of entries."""
5396    if get_tsv_mode():
5397        print_entries_tsv(entries_by_user, usernames)
5398        return
5399        
5400    table = Table(title="Feed Entries")
5401    table.add_column("User", style="cyan", no_wrap=True)
5402    table.add_column("Title", style="bold")
5403    table.add_column("Updated", style="blue")
5404    table.add_column("URL", style="green")
5405
5406    # Combine all entries with usernames
5407    all_entries = []
5408    for entries, username in zip(entries_by_user, usernames):
5409        for entry in entries:
5410            all_entries.append((username, entry))
5411
5412    # Sort by updated time (newest first)
5413    all_entries.sort(key=lambda x: x[1].updated, reverse=True)
5414
5415    for username, entry in all_entries:
5416        # Format updated time
5417        updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")
5418
5419        # Truncate title if too long
5420        title = entry.title
5421        if len(title) > 50:
5422            title = title[:47] + "..."
5423
5424        table.add_row(
5425            username,
5426            title,
5427            updated_str,
5428            str(entry.link),
5429        )
5430
5431    console.print(table)
5432</file>
5433
5434<file path="src/thicket/cli/main.py">
5435"""Main CLI application using Typer."""
5436
5437import typer
5438from rich.console import Console
5439
5440from .. import __version__
5441
5442app = typer.Typer(
5443    name="thicket",
5444    help="A CLI tool for persisting Atom/RSS feeds in Git repositories",
5445    no_args_is_help=True,
5446    rich_markup_mode="rich",
5447)
5448
5449console = Console()
5450
5451# Global state for TSV output mode
5452tsv_mode = False
5453
5454
5455def version_callback(value: bool) -> None:
5456    """Show version and exit."""
5457    if value:
5458        console.print(f"thicket version {__version__}")
5459        raise typer.Exit()
5460
5461
5462@app.callback()
5463def main(
5464    version: bool = typer.Option(
5465        None,
5466        "--version",
5467        "-v",
5468        help="Show the version and exit",
5469        callback=version_callback,
5470        is_eager=True,
5471    ),
5472    tsv: bool = typer.Option(
5473        False,
5474        "--tsv",
5475        help="Output in tab-separated values format without truncation",
5476    ),
5477) -> None:
5478    """Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories."""
5479    global tsv_mode
5480    tsv_mode = tsv
5481
5482
5483# Import commands to register them
5484from .commands import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync
5485
5486if __name__ == "__main__":
5487    app()
5488</file>
5489
5490<file path="src/thicket/core/git_store.py">
5491"""Git repository operations for thicket."""
5492
5493import json
5494from datetime import datetime
5495from pathlib import Path
5496from typing import Optional
5497
5498import git
5499from git import Repo
5500
5501from ..models import AtomEntry, DuplicateMap, GitStoreIndex, UserMetadata
5502
5503
5504class GitStore:
5505    """Manages the Git repository for storing feed entries."""
5506
5507    def __init__(self, repo_path: Path):
5508        """Initialize the Git store."""
5509        self.repo_path = repo_path
5510        self.repo: Optional[Repo] = None
5511        self._ensure_repo()
5512
5513    def _ensure_repo(self) -> None:
5514        """Ensure the Git repository exists and is initialized."""
5515        if not self.repo_path.exists():
5516            self.repo_path.mkdir(parents=True, exist_ok=True)
5517
5518        try:
5519            self.repo = Repo(self.repo_path)
5520        except git.InvalidGitRepositoryError:
5521            # Initialize new repository
5522            self.repo = Repo.init(self.repo_path)
5523            self._create_initial_structure()
5524
5525    def _create_initial_structure(self) -> None:
5526        """Create initial Git store structure."""
5527        # Create index.json
5528        index = GitStoreIndex(
5529            created=datetime.now(),
5530            last_updated=datetime.now(),
5531        )
5532        self._save_index(index)
5533
5534        # Create duplicates.json
5535        duplicates = DuplicateMap()
5536        self._save_duplicates(duplicates)
5537
5538        # Create initial commit
5539        self.repo.index.add(["index.json", "duplicates.json"])
5540        self.repo.index.commit("Initial thicket repository structure")
5541
5542    def _save_index(self, index: GitStoreIndex) -> None:
5543        """Save the index to index.json."""
5544        index_path = self.repo_path / "index.json"
5545        with open(index_path, "w") as f:
5546            json.dump(index.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)
5547
5548    def _load_index(self) -> GitStoreIndex:
5549        """Load the index from index.json."""
5550        index_path = self.repo_path / "index.json"
5551        if not index_path.exists():
5552            return GitStoreIndex(
5553                created=datetime.now(),
5554                last_updated=datetime.now(),
5555            )
5556
5557        with open(index_path) as f:
5558            data = json.load(f)
5559
5560        return GitStoreIndex(**data)
5561
5562    def _save_duplicates(self, duplicates: DuplicateMap) -> None:
5563        """Save duplicates map to duplicates.json."""
5564        duplicates_path = self.repo_path / "duplicates.json"
5565        with open(duplicates_path, "w") as f:
5566            json.dump(duplicates.model_dump(exclude_none=True), f, indent=2)
5567
5568    def _load_duplicates(self) -> DuplicateMap:
5569        """Load duplicates map from duplicates.json."""
5570        duplicates_path = self.repo_path / "duplicates.json"
5571        if not duplicates_path.exists():
5572            return DuplicateMap()
5573
5574        with open(duplicates_path) as f:
5575            data = json.load(f)
5576
5577        return DuplicateMap(**data)
5578
5579    def add_user(self, username: str, display_name: Optional[str] = None,
5580                 email: Optional[str] = None, homepage: Optional[str] = None,
5581                 icon: Optional[str] = None, feeds: Optional[list[str]] = None) -> UserMetadata:
5582        """Add a new user to the Git store."""
5583        index = self._load_index()
5584
5585        # Create user directory
5586        user_dir = self.repo_path / username
5587        user_dir.mkdir(exist_ok=True)
5588
5589        # Create user metadata
5590        user_metadata = UserMetadata(
5591            username=username,
5592            display_name=display_name,
5593            email=email,
5594            homepage=homepage,
5595            icon=icon,
5596            feeds=feeds or [],
5597            directory=username,
5598            created=datetime.now(),
5599            last_updated=datetime.now(),
5600        )
5601
5602
5603        # Update index
5604        index.add_user(user_metadata)
5605        self._save_index(index)
5606
5607        return user_metadata
5608
5609    def get_user(self, username: str) -> Optional[UserMetadata]:
5610        """Get user metadata by username."""
5611        index = self._load_index()
5612        return index.get_user(username)
5613
5614    def update_user(self, username: str, **kwargs) -> bool:
5615        """Update user metadata."""
5616        index = self._load_index()
5617        user = index.get_user(username)
5618
5619        if not user:
5620            return False
5621
5622        # Update user metadata
5623        for key, value in kwargs.items():
5624            if hasattr(user, key) and value is not None:
5625                setattr(user, key, value)
5626
5627        user.update_timestamp()
5628
5629
5630        # Update index
5631        index.add_user(user)
5632        self._save_index(index)
5633
5634        return True
5635
5636    def store_entry(self, username: str, entry: AtomEntry) -> bool:
5637        """Store an entry in the user's directory."""
5638        user = self.get_user(username)
5639        if not user:
5640            return False
5641
5642        # Sanitize entry ID for filename
5643        from .feed_parser import FeedParser
5644        parser = FeedParser()
5645        safe_id = parser.sanitize_entry_id(entry.id)
5646
5647        # Create entry file
5648        user_dir = self.repo_path / user.directory
5649        entry_path = user_dir / f"{safe_id}.json"
5650
5651        # Check if entry already exists
5652        entry_exists = entry_path.exists()
5653
5654        # Save entry
5655        with open(entry_path, "w") as f:
5656            json.dump(entry.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)
5657
5658        # Update user metadata if new entry
5659        if not entry_exists:
5660            index = self._load_index()
5661            index.update_entry_count(username, 1)
5662            self._save_index(index)
5663
5664        return True
5665
5666    def get_entry(self, username: str, entry_id: str) -> Optional[AtomEntry]:
5667        """Get an entry by username and entry ID."""
5668        user = self.get_user(username)
5669        if not user:
5670            return None
5671
5672        # Sanitize entry ID
5673        from .feed_parser import FeedParser
5674        parser = FeedParser()
5675        safe_id = parser.sanitize_entry_id(entry_id)
5676
5677        entry_path = self.repo_path / user.directory / f"{safe_id}.json"
5678        if not entry_path.exists():
5679            return None
5680
5681        with open(entry_path) as f:
5682            data = json.load(f)
5683
5684        return AtomEntry(**data)
5685
5686    def list_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]:
5687        """List entries for a user."""
5688        user = self.get_user(username)
5689        if not user:
5690            return []
5691
5692        user_dir = self.repo_path / user.directory
5693        if not user_dir.exists():
5694            return []
5695
5696        entries = []
5697        entry_files = sorted(user_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
5698
5699
5700        if limit:
5701            entry_files = entry_files[:limit]
5702
5703        for entry_file in entry_files:
5704            try:
5705                with open(entry_file) as f:
5706                    data = json.load(f)
5707                entries.append(AtomEntry(**data))
5708            except Exception:
5709                # Skip invalid entries
5710                continue
5711
5712        return entries
5713
5714    def get_duplicates(self) -> DuplicateMap:
5715        """Get the duplicates map."""
5716        return self._load_duplicates()
5717
5718    def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
5719        """Add a duplicate mapping."""
5720        duplicates = self._load_duplicates()
5721        duplicates.add_duplicate(duplicate_id, canonical_id)
5722        self._save_duplicates(duplicates)
5723
5724    def remove_duplicate(self, duplicate_id: str) -> bool:
5725        """Remove a duplicate mapping."""
5726        duplicates = self._load_duplicates()
5727        result = duplicates.remove_duplicate(duplicate_id)
5728        self._save_duplicates(duplicates)
5729        return result
5730
5731    def commit_changes(self, message: str) -> None:
5732        """Commit all changes to the Git repository."""
5733        if not self.repo:
5734            return
5735
5736        # Add all changes
5737        self.repo.git.add(A=True)
5738
5739        # Check if there are changes to commit
5740        if self.repo.index.diff("HEAD"):
5741            self.repo.index.commit(message)
5742
5743    def get_stats(self) -> dict:
5744        """Get statistics about the Git store."""
5745        index = self._load_index()
5746        duplicates = self._load_duplicates()
5747
5748        return {
5749            "total_users": len(index.users),
5750            "total_entries": index.total_entries,
5751            "total_duplicates": len(duplicates.duplicates),
5752            "last_updated": index.last_updated,
5753            "repository_size": sum(f.stat().st_size for f in self.repo_path.rglob("*") if f.is_file()),
5754        }
5755
5756    def search_entries(self, query: str, username: Optional[str] = None,
5757                      limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]:
5758        """Search entries by content."""
5759        results = []
5760
5761        # Get users to search
5762        index = self._load_index()
5763        users = [index.get_user(username)] if username else list(index.users.values())
5764        users = [u for u in users if u is not None]
5765
5766        for user in users:
5767            user_dir = self.repo_path / user.directory
5768            if not user_dir.exists():
5769                continue
5770
5771            entry_files = user_dir.glob("*.json")
5772
5773            for entry_file in entry_files:
5774                try:
5775                    with open(entry_file) as f:
5776                        data = json.load(f)
5777
5778                    entry = AtomEntry(**data)
5779
5780                    # Simple text search in title, summary, and content
5781                    searchable_text = " ".join(filter(None, [
5782                        entry.title,
5783                        entry.summary or "",
5784                        entry.content or "",
5785                    ])).lower()
5786
5787                    if query.lower() in searchable_text:
5788                        results.append((user.username, entry))
5789
5790                        if limit and len(results) >= limit:
5791                            return results
5792
5793                except Exception:
5794                    # Skip invalid entries
5795                    continue
5796
5797        # Sort by updated time (newest first)
5798        results.sort(key=lambda x: x[1].updated, reverse=True)
5799
5800        return results[:limit] if limit else results
5801</file>
5802
5803<file path="ARCH.md">
5804# Thicket Architecture Design
5805
5806## Overview
5807Thicket is a modern CLI tool for persisting Atom/RSS feeds in a Git repository, designed to enable distributed webblog comment structures.
5808
5809## Technology Stack
5810
5811### Core Libraries
5812
5813#### CLI Framework
5814- **Typer** (0.15.x) - Modern CLI framework with type hints
5815- **Rich** (13.x) - Beautiful terminal output, progress bars, and tables
5816- **prompt-toolkit** - Interactive prompts when needed
5817
5818#### Feed Processing
5819- **feedparser** (6.0.11) - Universal feed parser supporting RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0
5820  - Alternative: **atoma** for stricter Atom/RSS parsing with JSON feed support
5821  - Alternative: **fastfeedparser** for high-performance parsing (10x faster)
5822
5823#### Git Integration
5824- **GitPython** (3.1.44) - High-level git operations, requires git CLI
5825  - Alternative: **pygit2** (1.18.0) - Direct libgit2 bindings, better for authentication
5826
5827#### HTTP Client
5828- **httpx** (0.28.x) - Modern async/sync HTTP client with connection pooling
5829- **aiohttp** (3.11.x) - For async-only operations if needed
5830
5831#### Configuration & Data Models
5832- **pydantic** (2.11.x) - Data validation and settings management
5833- **pydantic-settings** (2.10.x) - Configuration file handling with env var support
5834
5835#### Utilities
5836- **pendulum** (3.x) - Better datetime handling
5837- **bleach** (6.x) - HTML sanitization for feed content
5838- **platformdirs** (4.x) - Cross-platform directory paths
5839
5840## Project Structure
5841
5842```
5843thicket/
5844├── pyproject.toml          # Modern Python packaging
5845├── README.md               # Project documentation
5846├── ARCH.md                 # This file
5847├── CLAUDE.md               # Project instructions
5848├── .gitignore
5849├── src/
5850│   └── thicket/
5851│       ├── __init__.py
5852│       ├── __main__.py     # Entry point for `python -m thicket`
5853│       ├── cli/            # CLI commands and interface
5854│       │   ├── __init__.py
5855│       │   ├── main.py     # Main CLI app with Typer
5856│       │   ├── commands/   # Subcommands
5857│       │   │   ├── __init__.py
5858│       │   │   ├── init.py      # Initialize git store
5859│       │   │   ├── add.py       # Add users and feeds
5860│       │   │   ├── sync.py      # Sync feeds
5861│       │   │   ├── list_cmd.py  # List users/feeds
5862│       │   │   ├── duplicates.py # Manage duplicate entries
5863│       │   │   ├── links_cmd.py  # Extract and categorize links
5864│       │   │   └── index_cmd.py  # Build reference index and show threads
5865│       │   └── utils.py    # CLI utilities (progress, formatting)
5866│       ├── core/           # Core business logic
5867│       │   ├── __init__.py
5868│       │   ├── feed_parser.py   # Feed parsing and normalization
5869│       │   ├── git_store.py     # Git repository operations
5870│       │   └── reference_parser.py # Link extraction and threading
5871│       ├── models/         # Pydantic data models
5872│       │   ├── __init__.py
5873│       │   ├── config.py        # Configuration models
5874│       │   ├── feed.py          # Feed/Entry models
5875│       │   └── user.py          # User metadata models
5876│       └── utils/          # Shared utilities
5877│           └── __init__.py
5878├── tests/
5879│   ├── __init__.py
5880│   ├── conftest.py         # pytest configuration
5881│   ├── test_feed_parser.py
5882│   ├── test_git_store.py
5883│   └── fixtures/           # Test data
5884│       └── feeds/
5885└── docs/
5886    └── examples/           # Example configurations
5887```
5888
5889## Data Models
5890
5891### Configuration File (YAML/TOML)
5892```python
5893class ThicketConfig(BaseSettings):
5894    git_store: Path  # Git repository location
5895    cache_dir: Path  # Cache directory
5896    users: list[UserConfig]
5897    
5898    model_config = SettingsConfigDict(
5899        env_prefix="THICKET_",
5900        env_file=".env",
5901        yaml_file="thicket.yaml"
5902    )
5903
5904class UserConfig(BaseModel):
5905    username: str
5906    feeds: list[HttpUrl]
5907    email: Optional[EmailStr] = None
5908    homepage: Optional[HttpUrl] = None
5909    icon: Optional[HttpUrl] = None
5910    display_name: Optional[str] = None
5911```
5912
5913### Feed Storage Format
5914```python
5915class AtomEntry(BaseModel):
5916    id: str  # Original Atom ID
5917    title: str
5918    link: HttpUrl
5919    updated: datetime
5920    published: Optional[datetime]
5921    summary: Optional[str]
5922    content: Optional[str]  # Full body content from Atom entry
5923    content_type: Optional[str] = "html"  # text, html, xhtml
5924    author: Optional[dict]
5925    categories: list[str] = []
5926    rights: Optional[str] = None  # Copyright info
5927    source: Optional[str] = None  # Source feed URL
5928    # Additional Atom fields preserved during RSS->Atom conversion
5929    
5930    model_config = ConfigDict(
5931        json_encoders={
5932            datetime: lambda v: v.isoformat()
5933        }
5934    )
5935
5936class DuplicateMap(BaseModel):
5937    """Maps duplicate entry IDs to canonical entry IDs"""
5938    duplicates: dict[str, str] = {}  # duplicate_id -> canonical_id
5939    comment: str = "Entry IDs that map to the same canonical content"
5940    
5941    def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None:
5942        """Add a duplicate mapping"""
5943        self.duplicates[duplicate_id] = canonical_id
5944    
5945    def remove_duplicate(self, duplicate_id: str) -> bool:
5946        """Remove a duplicate mapping. Returns True if existed."""
5947        return self.duplicates.pop(duplicate_id, None) is not None
5948    
5949    def get_canonical(self, entry_id: str) -> str:
5950        """Get canonical ID for an entry (returns original if not duplicate)"""
5951        return self.duplicates.get(entry_id, entry_id)
5952    
5953    def is_duplicate(self, entry_id: str) -> bool:
5954        """Check if entry ID is marked as duplicate"""
5955        return entry_id in self.duplicates
5956```
5957
5958## Git Repository Structure
5959```
5960git-store/
5961├── index.json              # User directory index
5962├── duplicates.json         # Manual curation of duplicate entries
5963├── links.json              # Unified links, references, and mapping data
5964├── user1/
5965│   ├── entry_id_1.json     # Sanitized entry files
5966│   ├── entry_id_2.json
5967│   └── ...
5968└── user2/
5969    └── ...
5970```
5971
5972## Key Design Decisions
5973
5974### 1. Feed Normalization & Auto-Discovery
5975- All RSS feeds converted to Atom format before storage
5976- Preserves maximum metadata during conversion
5977- Sanitizes HTML content to prevent XSS
5978- **Auto-discovery**: Extracts user metadata from feed during `add user` command
5979
5980### 2. ID Sanitization
5981- Consistent algorithm to convert Atom IDs to safe filenames
5982- Handles edge cases (very long IDs, special characters)
5983- Maintains reversibility where possible
5984
5985### 3. Git Operations
5986- Uses GitPython for simplicity (no authentication required)
5987- Single main branch for all users and entries
5988- Atomic commits per sync operation
5989- Meaningful commit messages with feed update summaries
5990- Preserves complete history - never delete entries even if they disappear from feeds
5991
5992### 4. Caching Strategy
5993- HTTP caching with Last-Modified/ETag support
5994- Local cache of parsed feeds with TTL
5995- Cache invalidation on configuration changes
5996- Git store serves as permanent historical archive beyond feed depth limits
5997
5998### 5. Error Handling
5999- Graceful handling of feed parsing errors
6000- Retry logic for network failures
6001- Clear error messages with recovery suggestions
6002
6003## CLI Command Structure
6004
6005```bash
6006# Initialize a new git store
6007thicket init /path/to/store
6008
6009# Add a user with feeds (auto-discovers metadata from feed)
6010thicket add user "alyssa" \
6011  --feed "https://example.com/feed.atom"
6012  # Auto-populates: email, homepage, icon, display_name from feed metadata
6013
6014# Add a user with manual overrides
6015thicket add user "alyssa" \
6016  --feed "https://example.com/feed.atom" \
6017  --email "alyssa@example.com" \
6018  --homepage "https://alyssa.example.com" \
6019  --icon "https://example.com/avatar.png" \
6020  --display-name "Alyssa P. Hacker"
6021
6022# Add additional feed to existing user
6023thicket add feed "alyssa" "https://example.com/other-feed.rss"
6024
6025# Sync all feeds (designed for cron usage)
6026thicket sync --all
6027
6028# Sync specific user
6029thicket sync --user alyssa
6030
6031# List users and their feeds
6032thicket list users
6033thicket list feeds --user alyssa
6034
6035# Manage duplicate entries
6036thicket duplicates list
6037thicket duplicates add <entry_id_1> <entry_id_2>  # Mark as duplicates
6038thicket duplicates remove <entry_id_1> <entry_id_2>  # Unmark duplicates
6039
6040# Link processing and threading
6041thicket links --verbose                 # Extract and categorize all links
6042thicket index --verbose                 # Build reference index for threading
6043thicket threads                         # Show conversation threads
6044thicket threads --username user1        # Show threads for specific user
6045thicket threads --min-size 3           # Show threads with minimum size
6046```
6047
6048## Performance Considerations
6049
60501. **Concurrent Feed Fetching**: Use httpx with asyncio for parallel downloads
60512. **Incremental Updates**: Only fetch/parse feeds that have changed
60523. **Efficient Git Operations**: Batch commits, use shallow clones where appropriate
60534. **Progress Feedback**: Rich progress bars for long operations
6054
6055## Security Considerations
6056
60571. **HTML Sanitization**: Use bleach to clean feed content
60582. **URL Validation**: Strict validation of feed URLs
60593. **Git Security**: No credentials stored in repository
60604. **Path Traversal**: Careful sanitization of filenames
6061
6062## Future Enhancements
6063
60641. **Web Interface**: Optional web UI for browsing the git store
60652. **Webhooks**: Notify external services on feed updates
60663. **Feed Discovery**: Auto-discover feeds from HTML pages
60674. **Export Formats**: Generate static sites, OPML exports
60685. **Federation**: P2P sync between thicket instances
6069
6070## Requirements Clarification
6071
6072**✓ Resolved Requirements:**
60731. **Feed Update Frequency**: Designed for cron usage - no built-in scheduling needed
60742. **Duplicate Handling**: Manual curation via `duplicates.json` file with CLI commands
60753. **Git Branching**: Single main branch for all users and entries
60764. **Authentication**: No feeds require authentication currently
60775. **Content Storage**: Store complete Atom entry body content as provided
60786. **Deleted Entries**: Preserve all entries in Git store permanently (historical archive)
60797. **History Depth**: Git store maintains full history beyond feed depth limits
60808. **Feed Auto-Discovery**: Extract user metadata from feed during `add user` command
6081
6082## Duplicate Entry Management
6083
6084### Duplicate Detection Strategy
6085- **Manual Curation**: Duplicates identified and managed manually via CLI
6086- **Storage**: `duplicates.json` file in Git root maps entry IDs to canonical entries
6087- **Structure**: `{"duplicate_id": "canonical_id", ...}`
6088- **CLI Commands**: Add/remove duplicate mappings with validation
6089- **Query Resolution**: Search/list commands resolve duplicates to canonical entries
6090
6091### Duplicate File Format
6092```json
6093{
6094  "https://example.com/feed/entry/123": "https://canonical.com/posts/same-post",
6095  "https://mirror.com/articles/456": "https://canonical.com/posts/same-post",
6096  "comment": "Entry IDs that map to the same canonical content"
6097}
6098```
6099
6100## Feed Metadata Auto-Discovery
6101
6102### Extraction Strategy
6103When adding a new user with `thicket add user`, the system fetches and parses the feed to extract:
6104
6105- **Display Name**: From `feed.title` or `feed.author.name`
6106- **Email**: From `feed.author.email` or `feed.managingEditor`
6107- **Homepage**: From `feed.link` or `feed.author.uri`
6108- **Icon**: From `feed.logo`, `feed.icon`, or `feed.image.url`
6109
6110### Discovery Priority Order
61111. **Author Information**: Prefer `feed.author.*` fields (more specific to person)
61122. **Feed-Level**: Fall back to feed-level metadata
61133. **Manual Override**: CLI flags always take precedence over discovered values
61144. **Update Behavior**: Auto-discovery only runs during initial `add user`, not on sync
6115
6116### Extracted Metadata Format
6117```python
6118class FeedMetadata(BaseModel):
6119    title: Optional[str] = None
6120    author_name: Optional[str] = None
6121    author_email: Optional[EmailStr] = None
6122    author_uri: Optional[HttpUrl] = None
6123    link: Optional[HttpUrl] = None
6124    logo: Optional[HttpUrl] = None
6125    icon: Optional[HttpUrl] = None
6126    image_url: Optional[HttpUrl] = None
6127    
6128    def to_user_config(self, username: str, feed_url: HttpUrl) -> UserConfig:
6129        """Convert discovered metadata to UserConfig with fallbacks"""
6130        return UserConfig(
6131            username=username,
6132            feeds=[feed_url],
6133            display_name=self.author_name or self.title,
6134            email=self.author_email,
6135            homepage=self.author_uri or self.link,
6136            icon=self.logo or self.icon or self.image_url
6137        )
6138```
6139
6140## Link Processing and Threading Architecture
6141
6142### Overview
6143The thicket system implements a sophisticated link processing and threading system to create email-style threaded views of blog entries by tracking cross-references between different blogs.
6144
6145### Link Processing Pipeline
6146
6147#### 1. Link Extraction (`thicket links`)
6148The `links` command systematically extracts all outbound links from blog entries and categorizes them:
6149
6150```python
6151class LinkData(BaseModel):
6152    url: str                    # Fully resolved URL
6153    entry_id: str              # Source entry ID
6154    username: str              # Source username
6155    context: str               # Surrounding text context
6156    category: str              # "internal", "user", or "unknown"
6157    target_username: Optional[str]  # Target user if applicable
6158```
6159
6160**Link Categories:**
6161- **Internal**: Links to the same user's domain (self-references)
6162- **User**: Links to other tracked users' domains
6163- **Unknown**: Links to external sites not tracked by thicket
6164
6165#### 2. URL Resolution
6166All links are properly resolved using the Atom feed's base URL to handle:
6167- Relative URLs (converted to absolute)
6168- Protocol-relative URLs
6169- Fragment identifiers
6170- Redirects and canonical URLs
6171
6172#### 3. Domain Mapping
6173The system builds a comprehensive domain mapping from user configuration:
6174- Feed URLs → domain extraction
6175- Homepage URLs → domain extraction
6176- Reverse mapping: domain → username
6177
6178### Threading System
6179
6180#### 1. Reference Index Generation (`thicket index`)
6181Creates a bidirectional reference index from the categorized links:
6182
6183```python
6184class BlogReference(BaseModel):
6185    source_entry_id: str
6186    source_username: str
6187    target_url: str
6188    target_username: Optional[str]
6189    target_entry_id: Optional[str]
6190    context: str
6191```
6192
6193#### 2. Thread Detection Algorithm
6194Uses graph traversal to find connected blog entries:
6195- **Outbound references**: Links from an entry to other entries
6196- **Inbound references**: Links to an entry from other entries
6197- **Thread members**: All entries connected through references
6198
6199#### 3. Threading Display (`thicket threads`)
6200Creates email-style threaded views:
6201- Chronological ordering within threads
6202- Reference counts (outbound/inbound)
6203- Context preservation
6204- Filtering options (user, entry, minimum size)
6205
6206### Data Structures
6207
6208#### links.json Format (Unified Structure)
6209```json
6210{
6211  "links": {
6212    "https://example.com/post/123": {
6213      "referencing_entries": ["https://blog.user.com/entry/456"],
6214      "target_username": "user2"
6215    },
6216    "https://external-site.com/article": {
6217      "referencing_entries": ["https://blog.user.com/entry/789"]
6218    }
6219  },
6220  "reverse_mapping": {
6221    "https://blog.user.com/entry/456": ["https://example.com/post/123"],
6222    "https://blog.user.com/entry/789": ["https://external-site.com/article"]
6223  },
6224  "references": [
6225    {
6226      "source_entry_id": "https://blog.user.com/entry/456",
6227      "source_username": "user1",
6228      "target_url": "https://example.com/post/123",
6229      "target_username": "user2",
6230      "target_entry_id": "https://example.com/post/123",
6231      "context": "As mentioned in this post..."
6232    }
6233  ],
6234  "user_domains": {
6235    "user1": ["blog.user.com"],
6236    "user2": ["example.com"]
6237  }
6238}
6239```
6240
6241This unified structure eliminates duplication by:
6242- Storing each URL only once with minimal metadata
6243- Including all link data, reference data, and mappings in one file
6244- Using presence of `target_username` to identify tracked vs external links
6245- Providing bidirectional mappings for efficient queries
6246
6247### Unified Structure Benefits
6248
6249- **Eliminates Duplication**: Each URL appears only once with metadata
6250- **Single Source of Truth**: All link-related data in one file
6251- **Efficient Queries**: Fast lookups for both directions (URL→entries, entry→URLs)
6252- **Atomic Updates**: All link data changes together
6253- **Reduced I/O**: Fewer file operations
6254
6255### Implementation Benefits
6256
62571. **Systematic Link Processing**: All links are extracted and categorized consistently
62582. **Proper URL Resolution**: Handles relative URLs and base URL resolution correctly
62593. **Domain-based Categorization**: Automatically identifies user-to-user references
62604. **Bidirectional Indexing**: Supports both "who links to whom" and "who is linked by whom"
62615. **Thread Discovery**: Finds conversation threads automatically
62626. **Rich Context**: Preserves surrounding text for each link
62637. **Performance**: Pre-computed indexes for fast threading queries
6264
6265### CLI Commands
6266
6267```bash
6268# Extract and categorize all links
6269thicket links --verbose
6270
6271# Build reference index for threading
6272thicket index --verbose
6273
6274# Show all conversation threads
6275thicket threads
6276
6277# Show threads for specific user
6278thicket threads --username user1
6279
6280# Show threads with minimum size
6281thicket threads --min-size 3
6282```
6283
6284### Integration with Existing Commands
6285
6286The link processing system integrates seamlessly with existing thicket commands:
6287- `thicket sync` updates entries, requiring `thicket links` to be run afterward
6288- `thicket index` uses the output from `thicket links` for improved accuracy
6289- `thicket threads` provides the user-facing threading interface
6290
6291## Current Implementation Status
6292
6293### ✅ Completed Features
62941. **Core Infrastructure**
6295   - Modern CLI with Typer and Rich
6296   - Pydantic data models for type safety
6297   - Git repository operations with GitPython
6298   - Feed parsing and normalization with feedparser
6299
63002. **User and Feed Management**
6301   - `thicket init` - Initialize git store
6302   - `thicket add` - Add users and feeds with auto-discovery
6303   - `thicket sync` - Sync feeds with progress tracking
6304   - `thicket list` - List users, feeds, and entries
6305   - `thicket duplicates` - Manage duplicate entries
6306
63073. **Link Processing and Threading**
6308   - `thicket links` - Extract and categorize all outbound links
6309   - `thicket index` - Build reference index from links
6310   - `thicket threads` - Display threaded conversation views
6311   - Proper URL resolution with base URL handling
6312   - Domain-based link categorization
6313   - Context preservation for links
6314
6315### 📊 System Performance
6316- **Link Extraction**: Successfully processes thousands of blog entries
6317- **Categorization**: Identifies internal, user, and unknown links
6318- **Threading**: Creates email-style threaded views of conversations
6319- **Storage**: Efficient JSON-based data structures for links and references
6320
6321### 🔧 Current Architecture Highlights
6322- **Modular Design**: Clear separation between CLI, core logic, and models
6323- **Type Safety**: Comprehensive Pydantic models for data validation
6324- **Rich CLI**: Beautiful progress bars, tables, and error handling
6325- **Extensible**: Easy to add new commands and features
6326- **Git Integration**: All data stored in version-controlled JSON files
6327
6328### 🎯 Proven Functionality
6329The system has been tested with real blog data and successfully:
6330- Extracted 14,396 total links from blog entries
6331- Categorized 3,994 internal links, 363 user-to-user links, and 10,039 unknown links
6332- Built comprehensive domain mappings for 16 users across 20 domains
6333- Generated threaded views showing blog conversation patterns
6334
6335### 🚀 Ready for Use
6336The thicket system is now fully functional for:
6337- Maintaining Git repositories of blog feeds
6338- Tracking cross-references between blogs
6339- Creating threaded views of blog conversations
6340- Discovering blog interaction patterns
6341- Building distributed comment systems
6342</file>
6343
6344<file path="src/thicket/cli/utils.py">
6345"""CLI utilities and helpers."""
6346
6347from pathlib import Path
6348from typing import Optional
6349
6350import typer
6351from rich.console import Console
6352from rich.progress import Progress, SpinnerColumn, TextColumn
6353from rich.table import Table
6354
6355from ..models import ThicketConfig, UserMetadata
6356from ..core.git_store import GitStore
6357
6358console = Console()
6359
6360
6361def get_tsv_mode() -> bool:
6362    """Get the global TSV mode setting."""
6363    from .main import tsv_mode
6364    return tsv_mode
6365
6366
6367def load_config(config_path: Optional[Path] = None) -> ThicketConfig:
6368    """Load thicket configuration from file or environment."""
6369    if config_path and config_path.exists():
6370        import yaml
6371
6372        with open(config_path) as f:
6373            config_data = yaml.safe_load(f)
6374
6375        # Convert to ThicketConfig
6376        return ThicketConfig(**config_data)
6377
6378    # Try to load from default locations or environment
6379    try:
6380        # First try to find thicket.yaml in current directory
6381        default_config = Path("thicket.yaml")
6382        if default_config.exists():
6383            import yaml
6384            with open(default_config) as f:
6385                config_data = yaml.safe_load(f)
6386            return ThicketConfig(**config_data)
6387        
6388        # Fall back to environment variables
6389        return ThicketConfig()
6390    except Exception as e:
6391        console.print(f"[red]Error loading configuration: {e}[/red]")
6392        console.print("[yellow]Run 'thicket init' to create a new configuration.[/yellow]")
6393        raise typer.Exit(1) from e
6394
6395
6396def save_config(config: ThicketConfig, config_path: Path) -> None:
6397    """Save thicket configuration to file."""
6398    import yaml
6399
6400    config_data = config.model_dump(mode="json", exclude_none=True)
6401
6402    # Convert Path objects to strings for YAML serialization
6403    config_data["git_store"] = str(config_data["git_store"])
6404    config_data["cache_dir"] = str(config_data["cache_dir"])
6405
6406    with open(config_path, "w") as f:
6407        yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
6408
6409
6410def create_progress() -> Progress:
6411    """Create a Rich progress display."""
6412    return Progress(
6413        SpinnerColumn(),
6414        TextColumn("[progress.description]{task.description}"),
6415        console=console,
6416        transient=True,
6417    )
6418
6419
6420def print_users_table(config: ThicketConfig) -> None:
6421    """Print a table of users and their feeds."""
6422    if get_tsv_mode():
6423        print_users_tsv(config)
6424        return
6425        
6426    table = Table(title="Users and Feeds")
6427    table.add_column("Username", style="cyan", no_wrap=True)
6428    table.add_column("Display Name", style="magenta")
6429    table.add_column("Email", style="blue")
6430    table.add_column("Homepage", style="green")
6431    table.add_column("Feeds", style="yellow")
6432
6433    for user in config.users:
6434        feeds_str = "\n".join(str(feed) for feed in user.feeds)
6435        table.add_row(
6436            user.username,
6437            user.display_name or "",
6438            user.email or "",
6439            str(user.homepage) if user.homepage else "",
6440            feeds_str,
6441        )
6442
6443    console.print(table)
6444
6445
6446def print_feeds_table(config: ThicketConfig, username: Optional[str] = None) -> None:
6447    """Print a table of feeds, optionally filtered by username."""
6448    if get_tsv_mode():
6449        print_feeds_tsv(config, username)
6450        return
6451        
6452    table = Table(title=f"Feeds{f' for {username}' if username else ''}")
6453    table.add_column("Username", style="cyan", no_wrap=True)
6454    table.add_column("Feed URL", style="blue")
6455    table.add_column("Status", style="green")
6456
6457    users = [config.find_user(username)] if username else config.users
6458    users = [u for u in users if u is not None]
6459
6460    for user in users:
6461        for feed in user.feeds:
6462            table.add_row(
6463                user.username,
6464                str(feed),
6465                "Active",  # TODO: Add actual status checking
6466            )
6467
6468    console.print(table)
6469
6470
6471def confirm_action(message: str, default: bool = False) -> bool:
6472    """Prompt for confirmation."""
6473    return typer.confirm(message, default=default)
6474
6475
6476def print_success(message: str) -> None:
6477    """Print a success message."""
6478    console.print(f"[green]✓[/green] {message}")
6479
6480
6481def print_error(message: str) -> None:
6482    """Print an error message."""
6483    console.print(f"[red]✗[/red] {message}")
6484
6485
6486def print_warning(message: str) -> None:
6487    """Print a warning message."""
6488    console.print(f"[yellow]⚠[/yellow] {message}")
6489
6490
6491def print_info(message: str) -> None:
6492    """Print an info message."""
6493    console.print(f"[blue]ℹ[/blue] {message}")
6494
6495
6496def print_users_table_from_git(users: list[UserMetadata]) -> None:
6497    """Print a table of users from git repository."""
6498    if get_tsv_mode():
6499        print_users_tsv_from_git(users)
6500        return
6501        
6502    table = Table(title="Users and Feeds")
6503    table.add_column("Username", style="cyan", no_wrap=True)
6504    table.add_column("Display Name", style="magenta")
6505    table.add_column("Email", style="blue")
6506    table.add_column("Homepage", style="green")
6507    table.add_column("Feeds", style="yellow")
6508
6509    for user in users:
6510        feeds_str = "\n".join(user.feeds)
6511        table.add_row(
6512            user.username,
6513            user.display_name or "",
6514            user.email or "",
6515            user.homepage or "",
6516            feeds_str,
6517        )
6518
6519    console.print(table)
6520
6521
6522def print_feeds_table_from_git(git_store: GitStore, username: Optional[str] = None) -> None:
6523    """Print a table of feeds from git repository."""
6524    if get_tsv_mode():
6525        print_feeds_tsv_from_git(git_store, username)
6526        return
6527        
6528    table = Table(title=f"Feeds{f' for {username}' if username else ''}")
6529    table.add_column("Username", style="cyan", no_wrap=True)
6530    table.add_column("Feed URL", style="blue")
6531    table.add_column("Status", style="green")
6532
6533    if username:
6534        user = git_store.get_user(username)
6535        users = [user] if user else []
6536    else:
6537        index = git_store._load_index()
6538        users = list(index.users.values())
6539
6540    for user in users:
6541        for feed in user.feeds:
6542            table.add_row(
6543                user.username,
6544                feed,
6545                "Active",  # TODO: Add actual status checking
6546            )
6547
6548    console.print(table)
6549
6550
6551def print_users_tsv(config: ThicketConfig) -> None:
6552    """Print users in TSV format."""
6553    print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")
6554    for user in config.users:
6555        feeds_str = ",".join(str(feed) for feed in user.feeds)
6556        print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")
6557
6558
6559def print_users_tsv_from_git(users: list[UserMetadata]) -> None:
6560    """Print users from git repository in TSV format."""
6561    print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")
6562    for user in users:
6563        feeds_str = ",".join(user.feeds)
6564        print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")
6565
6566
6567def print_feeds_tsv(config: ThicketConfig, username: Optional[str] = None) -> None:
6568    """Print feeds in TSV format."""
6569    print("Username\tFeed URL\tStatus")
6570    users = [config.find_user(username)] if username else config.users
6571    users = [u for u in users if u is not None]
6572    
6573    for user in users:
6574        for feed in user.feeds:
6575            print(f"{user.username}\t{feed}\tActive")
6576
6577
6578def print_feeds_tsv_from_git(git_store: GitStore, username: Optional[str] = None) -> None:
6579    """Print feeds from git repository in TSV format."""
6580    print("Username\tFeed URL\tStatus")
6581    
6582    if username:
6583        user = git_store.get_user(username)
6584        users = [user] if user else []
6585    else:
6586        index = git_store._load_index()
6587        users = list(index.users.values())
6588    
6589    for user in users:
6590        for feed in user.feeds:
6591            print(f"{user.username}\t{feed}\tActive")
6592
6593
6594def print_entries_tsv(entries_by_user: list[list], usernames: list[str]) -> None:
6595    """Print entries in TSV format."""
6596    print("User\tAtom ID\tTitle\tUpdated\tURL")
6597    
6598    # Combine all entries with usernames
6599    all_entries = []
6600    for entries, username in zip(entries_by_user, usernames):
6601        for entry in entries:
6602            all_entries.append((username, entry))
6603    
6604    # Sort by updated time (newest first)
6605    all_entries.sort(key=lambda x: x[1].updated, reverse=True)
6606    
6607    for username, entry in all_entries:
6608        # Format updated time
6609        updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")
6610        
6611        # Escape tabs and newlines in title to preserve TSV format
6612        title = entry.title.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')
6613        
6614        print(f"{username}\t{entry.id}\t{title}\t{updated_str}\t{entry.link}")
6615</file>
6616
6617</files>