comparing 1f08568210306fdcbe16b628c87fa70ad77a05ba and alt on anil.recoil.org/thicket

+26 -25

ARCH.md

···

       157
       157
        
       git-store/

     

       158
       158
        
       ├── index.json              # User directory index

     

       159
       159
        
       ├── duplicates.json         # Manual curation of duplicate entries

     

       160
       160
       -
       ├── links.json              # All outbound links categorized by type

     

       161
       161
       -
       ├── references.json         # Cross-reference index for threading

     

       160
       160
       +
       ├── links.json              # Unified links, references, and mapping data

     

       162
       161
        
       ├── user1/

     

       163
       162
        
       │   ├── entry_id_1.json     # Sanitized entry files

     

       164
       163
        
       │   ├── entry_id_2.json

     
···

       403
       402
        
       

     

       404
       403
        
       ### Data Structures

     

       405
       404
        
       

     

       406
       406
       -
       #### links.json Format

     

       405
       405
       +
       #### links.json Format (Unified Structure)

     

       407
       406
        
       ```json

     

       408
       407
        
       {

     

       409
       409
       -
         "links": [

     

       410
       410
       -
           {

     

       411
       411
       -
             "url": "https://example.com/post/123",

     

       412
       412
       -
             "entry_id": "https://blog.user.com/entry/456",

     

       413
       413
       -
             "username": "user1",

     

       414
       414
       -
             "context": "As mentioned in this post...",

     

       415
       415
       -
             "category": "user",

     

       408
       408
       +
         "links": {

     

       409
       409
       +
           "https://example.com/post/123": {

     

       410
       410
       +
             "referencing_entries": ["https://blog.user.com/entry/456"],

     

       416
       411
        
             "target_username": "user2"

     

       412
       412
       +
           },

     

       413
       413
       +
           "https://external-site.com/article": {

     

       414
       414
       +
             "referencing_entries": ["https://blog.user.com/entry/789"]

     

       417
       415
        
           }

     

       418
       418
       -
         ],

     

       419
       419
       -
         "categories": {

     

       420
       420
       -
           "internal": 1234,

     

       421
       421
       -
           "user": 456,

     

       422
       422
       -
           "unknown": 7890

     

       423
       416
        
         },

     

       424
       424
       -
         "user_domains": {

     

       425
       425
       -
           "user1": ["blog.user.com", "user.com"],

     

       426
       426
       -
           "user2": ["example.com"]

     

       427
       427
       -
         }

     

       428
       428
       -
       }

     

       429
       429
       -
       ```

     

       430
       430
       -
       

     

       431
       431
       -
       #### references.json Format

     

       432
       432
       -
       ```json

     

       433
       433
       -
       {

     

       417
       417
       +
         "reverse_mapping": {

     

       418
       418
       +
           "https://blog.user.com/entry/456": ["https://example.com/post/123"],

     

       419
       419
       +
           "https://blog.user.com/entry/789": ["https://external-site.com/article"]

     

       420
       420
       +
         },

     

       434
       421
        
         "references": [

     

       435
       422
        
           {

     

       436
       423
        
             "source_entry_id": "https://blog.user.com/entry/456",

     
···

       447
       434
        
         }

     

       448
       435
        
       }

     

       449
       436
        
       ```

     

       437
       437
       +
       

     

       438
       438
       +
       This unified structure eliminates duplication by:

     

       439
       439
       +
       - Storing each URL only once with minimal metadata

     

       440
       440
       +
       - Including all link data, reference data, and mappings in one file

     

       441
       441
       +
       - Using presence of `target_username` to identify tracked vs external links

     

       442
       442
       +
       - Providing bidirectional mappings for efficient queries

     

       443
       443
       +
       

     

       444
       444
       +
       ### Unified Structure Benefits

     

       445
       445
       +
       

     

       446
       446
       +
       - **Eliminates Duplication**: Each URL appears only once with metadata

     

       447
       447
       +
       - **Single Source of Truth**: All link-related data in one file

     

       448
       448
       +
       - **Efficient Queries**: Fast lookups for both directions (URL→entries, entry→URLs)

     

       449
       449
       +
       - **Atomic Updates**: All link data changes together

     

       450
       450
       +
       - **Reduced I/O**: Fewer file operations

     

       450
       451
        
       

     

       451
       452
        
       ### Implementation Benefits

     

       452
       453

+6 -5

pyproject.toml

···

       39
       39
        
           "bleach>=6.0.0",

     

       40
       40
        
           "platformdirs>=4.0.0",

     

       41
       41
        
           "pyyaml>=6.0.0",

     

       42
       42
       -
           "email_validator"

     

       42
       42
       +
           "email_validator",

     

       43
       43
        
       ]

     

       44
       44
        
       

     

       45
       45
        
       [project.optional-dependencies]

     
···

       138
       138
        
           "-ra",

     

       139
       139
        
           "--strict-markers",

     

       140
       140
        
           "--strict-config",

     

       141
       141
       -
           "--cov=src/thicket",

     

       142
       142
       -
           "--cov-report=term-missing",

     

       143
       143
       -
           "--cov-report=html",

     

       144
       144
       -
           "--cov-report=xml",

     

       145
       141
        
       ]

     

       146
       142
        
       filterwarnings = [

     

       147
       143
        
           "error",

     
···

       170
       166
        
           "class .*\\bProtocol\\):",

     

       171
       167
        
           "@(abc\\.)?abstractmethod",

     

       172
       168
        
       ]

     

       169
       169
       +
       

     

       170
       170
       +
       [dependency-groups]

     

       171
       171
       +
       dev = [

     

       172
       172
       +
           "pytest>=8.4.1",

     

       173
       173
       +
       ]

+2 -2

src/thicket/cli/commands/__init__.py

···

       1
       1
        
       """CLI commands for thicket."""

     

       2
       2
        
       

     

       3
       3
        
       # Import all commands to register them with the main app

     

       4
       4
       -
       from . import add, duplicates, index_cmd, info_cmd, init, links_cmd, list_cmd, sync

     

       4
       4
       +
       from . import add, duplicates, info_cmd, init, list_cmd, sync

     

       5
       5
        
       

     

       6
       6
       -
       __all__ = ["add", "duplicates", "index_cmd", "info_cmd", "init", "links_cmd", "list_cmd", "sync"]

     

       6
       6
       +
       __all__ = ["add", "duplicates", "info_cmd", "init", "list_cmd", "sync"]

+44 -9

src/thicket/cli/commands/add.py

···

       23
       23
        
       def add_command(

     

       24
       24
        
           subcommand: str = typer.Argument(..., help="Subcommand: 'user' or 'feed'"),

     

       25
       25
        
           username: str = typer.Argument(..., help="Username"),

     

       26
       26
       -
           feed_url: Optional[str] = typer.Argument(None, help="Feed URL (required for 'user' command)"),

     

       26
       26
       +
           feed_url: Optional[str] = typer.Argument(

     

       27
       27
       +
               None, help="Feed URL (required for 'user' command)"

     

       28
       28
       +
           ),

     

       27
       29
        
           email: Optional[str] = typer.Option(None, "--email", "-e", help="User email"),

     

       28
       28
       -
           homepage: Optional[str] = typer.Option(None, "--homepage", "-h", help="User homepage"),

     

       30
       30
       +
           homepage: Optional[str] = typer.Option(

     

       31
       31
       +
               None, "--homepage", "-h", help="User homepage"

     

       32
       32
       +
           ),

     

       29
       33
        
           icon: Optional[str] = typer.Option(None, "--icon", "-i", help="User icon URL"),

     

       30
       30
       -
           display_name: Optional[str] = typer.Option(None, "--display-name", "-d", help="User display name"),

     

       34
       34
       +
           display_name: Optional[str] = typer.Option(

     

       35
       35
       +
               None, "--display-name", "-d", help="User display name"

     

       36
       36
       +
           ),

     

       31
       37
        
           config_file: Optional[Path] = typer.Option(

     

       32
       38
        
               Path("thicket.yaml"), "--config", help="Configuration file path"

     

       33
       39
        
           ),

     

       34
       40
        
           auto_discover: bool = typer.Option(

     

       35
       35
       -
               True, "--auto-discover/--no-auto-discover", help="Auto-discover user metadata from feed"

     

       41
       41
       +
               True,

     

       42
       42
       +
               "--auto-discover/--no-auto-discover",

     

       43
       43
       +
               help="Auto-discover user metadata from feed",

     

       36
       44
        
           ),

     

       37
       45
        
       ) -> None:

     

       38
       46
        
           """Add a user or feed to thicket."""

     

       39
       47
        
       

     

       40
       48
        
           if subcommand == "user":

     

       41
       41
       -
               add_user(username, feed_url, email, homepage, icon, display_name, config_file, auto_discover)

     

       49
       49
       +
               add_user(

     

       50
       50
       +
                   username,

     

       51
       51
       +
                   feed_url,

     

       52
       52
       +
                   email,

     

       53
       53
       +
                   homepage,

     

       54
       54
       +
                   icon,

     

       55
       55
       +
                   display_name,

     

       56
       56
       +
                   config_file,

     

       57
       57
       +
                   auto_discover,

     

       58
       58
       +
               )

     

       42
       59
        
           elif subcommand == "feed":

     

       43
       60
        
               add_feed(username, feed_url, config_file)

     

       44
       61
        
           else:

     
···

       89
       106
        
               discovered_metadata = asyncio.run(discover_feed_metadata(validated_feed_url))

     

       90
       107
        
       

     

       91
       108
        
           # Prepare user data with manual overrides taking precedence

     

       92
       92
       -
           user_display_name = display_name or (discovered_metadata.author_name or discovered_metadata.title if discovered_metadata else None)

     

       93
       93
       -
           user_email = email or (discovered_metadata.author_email if discovered_metadata else None)

     

       94
       94
       -
           user_homepage = homepage or (str(discovered_metadata.author_uri or discovered_metadata.link) if discovered_metadata else None)

     

       95
       95
       -
           user_icon = icon or (str(discovered_metadata.logo or discovered_metadata.icon or discovered_metadata.image_url) if discovered_metadata else None)

     

       109
       109
       +
           user_display_name = display_name or (

     

       110
       110
       +
               discovered_metadata.author_name or discovered_metadata.title

     

       111
       111
       +
               if discovered_metadata

     

       112
       112
       +
               else None

     

       113
       113
       +
           )

     

       114
       114
       +
           user_email = email or (

     

       115
       115
       +
               discovered_metadata.author_email if discovered_metadata else None

     

       116
       116
       +
           )

     

       117
       117
       +
           user_homepage = homepage or (

     

       118
       118
       +
               str(discovered_metadata.author_uri or discovered_metadata.link)

     

       119
       119
       +
               if discovered_metadata

     

       120
       120
       +
               else None

     

       121
       121
       +
           )

     

       122
       122
       +
           user_icon = icon or (

     

       123
       123
       +
               str(

     

       124
       124
       +
                   discovered_metadata.logo

     

       125
       125
       +
                   or discovered_metadata.icon

     

       126
       126
       +
                   or discovered_metadata.image_url

     

       127
       127
       +
               )

     

       128
       128
       +
               if discovered_metadata

     

       129
       129
       +
               else None

     

       130
       130
       +
           )

     

       96
       131
        
       

     

       97
       132
        
           # Add user to Git store

     

       98
       133
        
           git_store.add_user(

+7 -3

src/thicket/cli/commands/duplicates.py

···

       10
       10
        
       from ..main import app

     

       11
       11
        
       from ..utils import (

     

       12
       12
        
           console,

     

       13
       13
       +
           get_tsv_mode,

     

       13
       14
        
           load_config,

     

       14
       15
        
           print_error,

     

       15
       16
        
           print_info,

     

       16
       17
        
           print_success,

     

       17
       17
       -
           get_tsv_mode,

     

       18
       18
        
       )

     

       19
       19
        
       

     

       20
       20
        
       

     
···

       75
       75
        
               print_info(f"Total duplicates: {len(duplicates.duplicates)}")

     

       76
       76
        
       

     

       77
       77
        
       

     

       78
       78
       -
       def add_duplicate(git_store: GitStore, duplicate_id: Optional[str], canonical_id: Optional[str]) -> None:

     

       78
       78
       +
       def add_duplicate(

     

       79
       79
       +
           git_store: GitStore, duplicate_id: Optional[str], canonical_id: Optional[str]

     

       80
       80
       +
       ) -> None:

     

       79
       81
        
           """Add a duplicate mapping."""

     

       80
       82
        
           if not duplicate_id:

     

       81
       83
        
               print_error("Duplicate ID is required")

     
···

       124
       126
        
           # Remove the mapping

     

       125
       127
        
           if git_store.remove_duplicate(duplicate_id):

     

       126
       128
        
               # Commit changes

     

       127
       127
       -
               git_store.commit_changes(f"Remove duplicate mapping: {duplicate_id} -> {canonical_id}")

     

       129
       129
       +
               git_store.commit_changes(

     

       130
       130
       +
                   f"Remove duplicate mapping: {duplicate_id} -> {canonical_id}"

     

       131
       131
       +
               )

     

       128
       132
        
               print_success(f"Removed duplicate mapping: {duplicate_id} -> {canonical_id}")

     

       129
       133
        
           else:

     

       130
       134
        
               print_error(f"Failed to remove duplicate mapping: {duplicate_id}")

-396

src/thicket/cli/commands/index_cmd.py

···

       1
       1
       -
       """CLI command for building reference index from blog entries."""

     

       2
       2
       -
       

     

       3
       3
       -
       import json

     

       4
       4
       -
       from pathlib import Path

     

       5
       5
       -
       from typing import Optional

     

       6
       6
       -
       

     

       7
       7
       -
       import typer

     

       8
       8
       -
       from rich.console import Console

     

       9
       9
       -
       from rich.progress import (

     

       10
       10
       -
           BarColumn,

     

       11
       11
       -
           Progress,

     

       12
       12
       -
           SpinnerColumn,

     

       13
       13
       -
           TaskProgressColumn,

     

       14
       14
       -
           TextColumn,

     

       15
       15
       -
       )

     

       16
       16
       -
       from rich.table import Table

     

       17
       17
       -
       

     

       18
       18
       -
       from ...core.git_store import GitStore

     

       19
       19
       -
       from ...core.reference_parser import ReferenceIndex, ReferenceParser

     

       20
       20
       -
       from ..main import app

     

       21
       21
       -
       from ..utils import get_tsv_mode, load_config

     

       22
       22
       -
       

     

       23
       23
       -
       console = Console()

     

       24
       24
       -
       

     

       25
       25
       -
       

     

       26
       26
       -
       @app.command()

     

       27
       27
       -
       def index(

     

       28
       28
       -
           config_file: Optional[Path] = typer.Option(

     

       29
       29
       -
               None,

     

       30
       30
       -
               "--config",

     

       31
       31
       -
               "-c",

     

       32
       32
       -
               help="Path to configuration file",

     

       33
       33
       -
           ),

     

       34
       34
       -
           output_file: Optional[Path] = typer.Option(

     

       35
       35
       -
               None,

     

       36
       36
       -
               "--output",

     

       37
       37
       -
               "-o",

     

       38
       38
       -
               help="Path to output index file (default: references.json in git store)",

     

       39
       39
       -
           ),

     

       40
       40
       -
           verbose: bool = typer.Option(

     

       41
       41
       -
               False,

     

       42
       42
       -
               "--verbose",

     

       43
       43
       -
               "-v",

     

       44
       44
       -
               help="Show detailed progress information",

     

       45
       45
       -
           ),

     

       46
       46
       -
       ) -> None:

     

       47
       47
       -
           """Build a reference index showing which blog entries reference others.

     

       48
       48
       -
       

     

       49
       49
       -
           This command analyzes all blog entries to detect cross-references between

     

       50
       50
       -
           different blogs, creating an index that can be used to build threaded

     

       51
       51
       -
           views of related content.

     

       52
       52
       -
           """

     

       53
       53
       -
           try:

     

       54
       54
       -
               # Load configuration

     

       55
       55
       -
               config = load_config(config_file)

     

       56
       56
       -
       

     

       57
       57
       -
               # Initialize Git store

     

       58
       58
       -
               git_store = GitStore(config.git_store)

     

       59
       59
       -
       

     

       60
       60
       -
               # Initialize reference parser

     

       61
       61
       -
               parser = ReferenceParser()

     

       62
       62
       -
       

     

       63
       63
       -
               # Build user domain mapping

     

       64
       64
       -
               if verbose:

     

       65
       65
       -
                   console.print("Building user domain mapping...")

     

       66
       66
       -
               user_domains = parser.build_user_domain_mapping(git_store)

     

       67
       67
       -
       

     

       68
       68
       -
               if verbose:

     

       69
       69
       -
                   console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")

     

       70
       70
       -
       

     

       71
       71
       -
               # Initialize reference index

     

       72
       72
       -
               ref_index = ReferenceIndex()

     

       73
       73
       -
               ref_index.user_domains = user_domains

     

       74
       74
       -
       

     

       75
       75
       -
               # Get all users

     

       76
       76
       -
               index = git_store._load_index()

     

       77
       77
       -
               users = list(index.users.keys())

     

       78
       78
       -
       

     

       79
       79
       -
               if not users:

     

       80
       80
       -
                   console.print("[yellow]No users found in Git store[/yellow]")

     

       81
       81
       -
                   raise typer.Exit(0)

     

       82
       82
       -
       

     

       83
       83
       -
               # Process all entries

     

       84
       84
       -
               total_entries = 0

     

       85
       85
       -
               total_references = 0

     

       86
       86
       -
               all_references = []

     

       87
       87
       -
       

     

       88
       88
       -
               with Progress(

     

       89
       89
       -
                   SpinnerColumn(),

     

       90
       90
       -
                   TextColumn("[progress.description]{task.description}"),

     

       91
       91
       -
                   BarColumn(),

     

       92
       92
       -
                   TaskProgressColumn(),

     

       93
       93
       -
                   console=console,

     

       94
       94
       -
               ) as progress:

     

       95
       95
       -
       

     

       96
       96
       -
                   # Count total entries first

     

       97
       97
       -
                   counting_task = progress.add_task("Counting entries...", total=len(users))

     

       98
       98
       -
                   entry_counts = {}

     

       99
       99
       -
                   for username in users:

     

       100
       100
       -
                       entries = git_store.list_entries(username)

     

       101
       101
       -
                       entry_counts[username] = len(entries)

     

       102
       102
       -
                       total_entries += len(entries)

     

       103
       103
       -
                       progress.advance(counting_task)

     

       104
       104
       -
       

     

       105
       105
       -
                   progress.remove_task(counting_task)

     

       106
       106
       -
       

     

       107
       107
       -
                   # Process entries - extract references

     

       108
       108
       -
                   processing_task = progress.add_task(

     

       109
       109
       -
                       f"Extracting references from {total_entries} entries...",

     

       110
       110
       -
                       total=total_entries

     

       111
       111
       -
                   )

     

       112
       112
       -
       

     

       113
       113
       -
                   for username in users:

     

       114
       114
       -
                       entries = git_store.list_entries(username)

     

       115
       115
       -
       

     

       116
       116
       -
                       for entry in entries:

     

       117
       117
       -
                           # Extract references from this entry

     

       118
       118
       -
                           references = parser.extract_references(entry, username, user_domains)

     

       119
       119
       -
                           all_references.extend(references)

     

       120
       120
       -
       

     

       121
       121
       -
                           progress.advance(processing_task)

     

       122
       122
       -
       

     

       123
       123
       -
                           if verbose and references:

     

       124
       124
       -
                               console.print(f"  Found {len(references)} references in {username}:{entry.title[:50]}...")

     

       125
       125
       -
       

     

       126
       126
       -
                   progress.remove_task(processing_task)

     

       127
       127
       -
       

     

       128
       128
       -
                   # Resolve target_entry_ids for references

     

       129
       129
       -
                   if all_references:

     

       130
       130
       -
                       resolve_task = progress.add_task(

     

       131
       131
       -
                           f"Resolving {len(all_references)} references...",

     

       132
       132
       -
                           total=len(all_references)

     

       133
       133
       -
                       )

     

       134
       134
       -
       

     

       135
       135
       -
                       if verbose:

     

       136
       136
       -
                           console.print(f"Resolving target entry IDs for {len(all_references)} references...")

     

       137
       137
       -
       

     

       138
       138
       -
                       resolved_references = parser.resolve_target_entry_ids(all_references, git_store)

     

       139
       139
       -
       

     

       140
       140
       -
                       # Count resolved references

     

       141
       141
       -
                       resolved_count = sum(1 for ref in resolved_references if ref.target_entry_id is not None)

     

       142
       142
       -
                       if verbose:

     

       143
       143
       -
                           console.print(f"Resolved {resolved_count} out of {len(all_references)} references")

     

       144
       144
       -
       

     

       145
       145
       -
                       # Add resolved references to index

     

       146
       146
       -
                       for ref in resolved_references:

     

       147
       147
       -
                           ref_index.add_reference(ref)

     

       148
       148
       -
                           total_references += 1

     

       149
       149
       -
                           progress.advance(resolve_task)

     

       150
       150
       -
       

     

       151
       151
       -
                       progress.remove_task(resolve_task)

     

       152
       152
       -
       

     

       153
       153
       -
               # Determine output path

     

       154
       154
       -
               if output_file:

     

       155
       155
       -
                   output_path = output_file

     

       156
       156
       -
               else:

     

       157
       157
       -
                   output_path = config.git_store / "references.json"

     

       158
       158
       -
       

     

       159
       159
       -
               # Save reference index

     

       160
       160
       -
               with open(output_path, "w") as f:

     

       161
       161
       -
                   json.dump(ref_index.to_dict(), f, indent=2, default=str)

     

       162
       162
       -
       

     

       163
       163
       -
               # Show summary

     

       164
       164
       -
               if not get_tsv_mode():

     

       165
       165
       -
                   console.print("\n[green]✓ Reference index built successfully[/green]")

     

       166
       166
       -
       

     

       167
       167
       -
               # Create summary table or TSV output

     

       168
       168
       -
               if get_tsv_mode():

     

       169
       169
       -
                   print("Metric\tCount")

     

       170
       170
       -
                   print(f"Total Users\t{len(users)}")

     

       171
       171
       -
                   print(f"Total Entries\t{total_entries}")

     

       172
       172
       -
                   print(f"Total References\t{total_references}")

     

       173
       173
       -
                   print(f"Outbound Refs\t{len(ref_index.outbound_refs)}")

     

       174
       174
       -
                   print(f"Inbound Refs\t{len(ref_index.inbound_refs)}")

     

       175
       175
       -
                   print(f"Output File\t{output_path}")

     

       176
       176
       -
               else:

     

       177
       177
       -
                   table = Table(title="Reference Index Summary")

     

       178
       178
       -
                   table.add_column("Metric", style="cyan")

     

       179
       179
       -
                   table.add_column("Count", style="green")

     

       180
       180
       -
       

     

       181
       181
       -
                   table.add_row("Total Users", str(len(users)))

     

       182
       182
       -
                   table.add_row("Total Entries", str(total_entries))

     

       183
       183
       -
                   table.add_row("Total References", str(total_references))

     

       184
       184
       -
                   table.add_row("Outbound Refs", str(len(ref_index.outbound_refs)))

     

       185
       185
       -
                   table.add_row("Inbound Refs", str(len(ref_index.inbound_refs)))

     

       186
       186
       -
                   table.add_row("Output File", str(output_path))

     

       187
       187
       -
       

     

       188
       188
       -
                   console.print(table)

     

       189
       189
       -
       

     

       190
       190
       -
               # Show some interesting statistics

     

       191
       191
       -
               if total_references > 0:

     

       192
       192
       -
                   if not get_tsv_mode():

     

       193
       193
       -
                       console.print("\n[bold]Reference Statistics:[/bold]")

     

       194
       194
       -
       

     

       195
       195
       -
                   # Most referenced users

     

       196
       196
       -
                   target_counts = {}

     

       197
       197
       -
                   unresolved_domains = set()

     

       198
       198
       -
       

     

       199
       199
       -
                   for ref in ref_index.references:

     

       200
       200
       -
                       if ref.target_username:

     

       201
       201
       -
                           target_counts[ref.target_username] = target_counts.get(ref.target_username, 0) + 1

     

       202
       202
       -
                       else:

     

       203
       203
       -
                           # Track unresolved domains

     

       204
       204
       -
                           from urllib.parse import urlparse

     

       205
       205
       -
                           domain = urlparse(ref.target_url).netloc.lower()

     

       206
       206
       -
                           unresolved_domains.add(domain)

     

       207
       207
       -
       

     

       208
       208
       -
                   if target_counts:

     

       209
       209
       -
                       if get_tsv_mode():

     

       210
       210
       -
                           print("Referenced User\tReference Count")

     

       211
       211
       -
                           for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:

     

       212
       212
       -
                               print(f"{username}\t{count}")

     

       213
       213
       -
                       else:

     

       214
       214
       -
                           console.print("\nMost referenced users:")

     

       215
       215
       -
                           for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]:

     

       216
       216
       -
                               console.print(f"  {username}: {count} references")

     

       217
       217
       -
       

     

       218
       218
       -
                   if unresolved_domains and verbose:

     

       219
       219
       -
                       if get_tsv_mode():

     

       220
       220
       -
                           print("Unresolved Domain\tCount")

     

       221
       221
       -
                           for domain in sorted(list(unresolved_domains)[:10]):

     

       222
       222
       -
                               print(f"{domain}\t1")

     

       223
       223
       -
                           if len(unresolved_domains) > 10:

     

       224
       224
       -
                               print(f"... and {len(unresolved_domains) - 10} more\t...")

     

       225
       225
       -
                       else:

     

       226
       226
       -
                           console.print(f"\nUnresolved domains: {len(unresolved_domains)}")

     

       227
       227
       -
                           for domain in sorted(list(unresolved_domains)[:10]):

     

       228
       228
       -
                               console.print(f"  {domain}")

     

       229
       229
       -
                           if len(unresolved_domains) > 10:

     

       230
       230
       -
                               console.print(f"  ... and {len(unresolved_domains) - 10} more")

     

       231
       231
       -
       

     

       232
       232
       -
           except Exception as e:

     

       233
       233
       -
               console.print(f"[red]Error building reference index: {e}[/red]")

     

       234
       234
       -
               if verbose:

     

       235
       235
       -
                   console.print_exception()

     

       236
       236
       -
               raise typer.Exit(1)

     

       237
       237
       -
       

     

       238
       238
       -
       

     

       239
       239
       -
       @app.command()

     

       240
       240
       -
       def threads(

     

       241
       241
       -
           config_file: Optional[Path] = typer.Option(

     

       242
       242
       -
               None,

     

       243
       243
       -
               "--config",

     

       244
       244
       -
               "-c",

     

       245
       245
       -
               help="Path to configuration file",

     

       246
       246
       -
           ),

     

       247
       247
       -
           index_file: Optional[Path] = typer.Option(

     

       248
       248
       -
               None,

     

       249
       249
       -
               "--index",

     

       250
       250
       -
               "-i",

     

       251
       251
       -
               help="Path to reference index file (default: references.json in git store)",

     

       252
       252
       -
           ),

     

       253
       253
       -
           username: Optional[str] = typer.Option(

     

       254
       254
       -
               None,

     

       255
       255
       -
               "--username",

     

       256
       256
       -
               "-u",

     

       257
       257
       -
               help="Show threads for specific username only",

     

       258
       258
       -
           ),

     

       259
       259
       -
           entry_id: Optional[str] = typer.Option(

     

       260
       260
       -
               None,

     

       261
       261
       -
               "--entry",

     

       262
       262
       -
               "-e",

     

       263
       263
       -
               help="Show thread for specific entry ID",

     

       264
       264
       -
           ),

     

       265
       265
       -
           min_size: int = typer.Option(

     

       266
       266
       -
               2,

     

       267
       267
       -
               "--min-size",

     

       268
       268
       -
               "-m",

     

       269
       269
       -
               help="Minimum thread size to display",

     

       270
       270
       -
           ),

     

       271
       271
       -
       ) -> None:

     

       272
       272
       -
           """Show threaded view of related blog entries.

     

       273
       273
       -
       

     

       274
       274
       -
           This command uses the reference index to show which blog entries

     

       275
       275
       -
           are connected through cross-references, creating an email-style

     

       276
       276
       -
           threaded view of the conversation.

     

       277
       277
       -
           """

     

       278
       278
       -
           try:

     

       279
       279
       -
               # Load configuration

     

       280
       280
       -
               config = load_config(config_file)

     

       281
       281
       -
       

     

       282
       282
       -
               # Determine index file path

     

       283
       283
       -
               if index_file:

     

       284
       284
       -
                   index_path = index_file

     

       285
       285
       -
               else:

     

       286
       286
       -
                   index_path = config.git_store / "references.json"

     

       287
       287
       -
       

     

       288
       288
       -
               if not index_path.exists():

     

       289
       289
       -
                   console.print(f"[red]Reference index not found: {index_path}[/red]")

     

       290
       290
       -
                   console.print("Run 'thicket index' first to build the reference index")

     

       291
       291
       -
                   raise typer.Exit(1)

     

       292
       292
       -
       

     

       293
       293
       -
               # Load reference index

     

       294
       294
       -
               with open(index_path) as f:

     

       295
       295
       -
                   index_data = json.load(f)

     

       296
       296
       -
       

     

       297
       297
       -
               ref_index = ReferenceIndex.from_dict(index_data)

     

       298
       298
       -
       

     

       299
       299
       -
               # Initialize Git store to get entry details

     

       300
       300
       -
               git_store = GitStore(config.git_store)

     

       301
       301
       -
       

     

       302
       302
       -
               if entry_id and username:

     

       303
       303
       -
                   # Show specific thread

     

       304
       304
       -
                   thread_members = ref_index.get_thread_members(username, entry_id)

     

       305
       305
       -
                   _display_thread(thread_members, ref_index, git_store, f"Thread for {username}:{entry_id}")

     

       306
       306
       -
       

     

       307
       307
       -
               elif username:

     

       308
       308
       -
                   # Show all threads involving this user

     

       309
       309
       -
                   user_index = git_store._load_index()

     

       310
       310
       -
                   user = user_index.get_user(username)

     

       311
       311
       -
                   if not user:

     

       312
       312
       -
                       console.print(f"[red]User not found: {username}[/red]")

     

       313
       313
       -
                       raise typer.Exit(1)

     

       314
       314
       -
       

     

       315
       315
       -
                   entries = git_store.list_entries(username)

     

       316
       316
       -
                   threads_found = set()

     

       317
       317
       -
       

     

       318
       318
       -
                   console.print(f"[bold]Threads involving {username}:[/bold]\n")

     

       319
       319
       -
       

     

       320
       320
       -
                   for entry in entries:

     

       321
       321
       -
                       thread_members = ref_index.get_thread_members(username, entry.id)

     

       322
       322
       -
                       if len(thread_members) >= min_size:

     

       323
       323
       -
                           thread_key = tuple(sorted(thread_members))

     

       324
       324
       -
                           if thread_key not in threads_found:

     

       325
       325
       -
                               threads_found.add(thread_key)

     

       326
       326
       -
                               _display_thread(thread_members, ref_index, git_store, f"Thread #{len(threads_found)}")

     

       327
       327
       -
       

     

       328
       328
       -
               else:

     

       329
       329
       -
                   # Show all threads

     

       330
       330
       -
                   console.print("[bold]All conversation threads:[/bold]\n")

     

       331
       331
       -
       

     

       332
       332
       -
                   all_threads = set()

     

       333
       333
       -
                   processed_entries = set()

     

       334
       334
       -
       

     

       335
       335
       -
                   # Get all entries

     

       336
       336
       -
                   user_index = git_store._load_index()

     

       337
       337
       -
                   for username in user_index.users.keys():

     

       338
       338
       -
                       entries = git_store.list_entries(username)

     

       339
       339
       -
                       for entry in entries:

     

       340
       340
       -
                           entry_key = (username, entry.id)

     

       341
       341
       -
                           if entry_key in processed_entries:

     

       342
       342
       -
                               continue

     

       343
       343
       -
       

     

       344
       344
       -
                           thread_members = ref_index.get_thread_members(username, entry.id)

     

       345
       345
       -
                           if len(thread_members) >= min_size:

     

       346
       346
       -
                               thread_key = tuple(sorted(thread_members))

     

       347
       347
       -
                               if thread_key not in all_threads:

     

       348
       348
       -
                                   all_threads.add(thread_key)

     

       349
       349
       -
                                   _display_thread(thread_members, ref_index, git_store, f"Thread #{len(all_threads)}")

     

       350
       350
       -
       

     

       351
       351
       -
                                   # Mark all members as processed

     

       352
       352
       -
                                   for member in thread_members:

     

       353
       353
       -
                                       processed_entries.add(member)

     

       354
       354
       -
       

     

       355
       355
       -
                   if not all_threads:

     

       356
       356
       -
                       console.print("[yellow]No conversation threads found[/yellow]")

     

       357
       357
       -
                       console.print(f"(minimum thread size: {min_size})")

     

       358
       358
       -
       

     

       359
       359
       -
           except Exception as e:

     

       360
       360
       -
               console.print(f"[red]Error showing threads: {e}[/red]")

     

       361
       361
       -
               raise typer.Exit(1)

     

       362
       362
       -
       

     

       363
       363
       -
       

     

       364
       364
       -
       def _display_thread(thread_members, ref_index, git_store, title):

     

       365
       365
       -
           """Display a single conversation thread."""

     

       366
       366
       -
           console.print(f"[bold cyan]{title}[/bold cyan]")

     

       367
       367
       -
           console.print(f"Thread size: {len(thread_members)} entries")

     

       368
       368
       -
       

     

       369
       369
       -
           # Get entry details for each member

     

       370
       370
       -
           thread_entries = []

     

       371
       371
       -
           for username, entry_id in thread_members:

     

       372
       372
       -
               entry = git_store.get_entry(username, entry_id)

     

       373
       373
       -
               if entry:

     

       374
       374
       -
                   thread_entries.append((username, entry))

     

       375
       375
       -
       

     

       376
       376
       -
           # Sort by publication date

     

       377
       377
       -
           thread_entries.sort(key=lambda x: x[1].published or x[1].updated)

     

       378
       378
       -
       

     

       379
       379
       -
           # Display entries

     

       380
       380
       -
           for i, (username, entry) in enumerate(thread_entries):

     

       381
       381
       -
               prefix = "├─" if i < len(thread_entries) - 1 else "└─"

     

       382
       382
       -
       

     

       383
       383
       -
               # Get references for this entry

     

       384
       384
       -
               outbound = ref_index.get_outbound_refs(username, entry.id)

     

       385
       385
       -
               inbound = ref_index.get_inbound_refs(username, entry.id)

     

       386
       386
       -
       

     

       387
       387
       -
               ref_info = ""

     

       388
       388
       -
               if outbound or inbound:

     

       389
       389
       -
                   ref_info = f" ({len(outbound)} out, {len(inbound)} in)"

     

       390
       390
       -
       

     

       391
       391
       -
               console.print(f"  {prefix} [{username}] {entry.title[:60]}...{ref_info}")

     

       392
       392
       -
       

     

       393
       393
       -
               if entry.published:

     

       394
       394
       -
                   console.print(f"    Published: {entry.published.strftime('%Y-%m-%d')}")

     

       395
       395
       -
       

     

       396
       396
       -
           console.print()  # Empty line after each thread

+105 -112

src/thicket/cli/commands/info_cmd.py

···

       1
       1
        
       """CLI command for displaying detailed information about a specific atom entry."""

     

       2
       2
        
       

     

       3
       3
       -
       import json

     

       4
       3
        
       from pathlib import Path

     

       5
       4
        
       from typing import Optional

     

       6
       5
        
       

     
···

       8
       7
        
       from rich.console import Console

     

       9
       8
        
       from rich.panel import Panel

     

       10
       9
        
       from rich.table import Table

     

       11
       11
       -
       from rich.text import Text

     

       12
       10
        
       

     

       13
       11
        
       from ...core.git_store import GitStore

     

       14
       14
       -
       from ...core.reference_parser import ReferenceIndex

     

       15
       12
        
       from ..main import app

     

       16
       16
       -
       from ..utils import load_config, get_tsv_mode

     

       13
       13
       +
       from ..utils import get_tsv_mode, load_config

     

       17
       14
        
       

     

       18
       15
        
       console = Console()

     

       19
       16
        
       

     
···

       21
       18
        
       @app.command()

     

       22
       19
        
       def info(

     

       23
       20
        
           identifier: str = typer.Argument(

     

       24
       24
       -
               ...,

     

       25
       25
       -
               help="The atom ID or URL of the entry to display information about"

     

       21
       21
       +
               ..., help="The atom ID or URL of the entry to display information about"

     

       26
       22
        
           ),

     

       27
       23
        
           username: Optional[str] = typer.Option(

     

       28
       24
        
               None,

     

       29
       25
        
               "--username",

     

       30
       26
        
               "-u",

     

       31
       31
       -
               help="Username to search for the entry (if not provided, searches all users)"

     

       27
       27
       +
               help="Username to search for the entry (if not provided, searches all users)",

     

       32
       28
        
           ),

     

       33
       29
        
           config_file: Optional[Path] = typer.Option(

     

       34
       30
        
               Path("thicket.yaml"),

     
···

       37
       33
        
               help="Path to configuration file",

     

       38
       34
        
           ),

     

       39
       35
        
           show_content: bool = typer.Option(

     

       40
       40
       -
               False,

     

       41
       41
       -
               "--content",

     

       42
       42
       -
               help="Include the full content of the entry in the output"

     

       36
       36
       +
               False, "--content", help="Include the full content of the entry in the output"

     

       43
       37
        
           ),

     

       44
       38
        
       ) -> None:

     

       45
       39
        
           """Display detailed information about a specific atom entry.

     

       46
       46
       -
           

     

       40
       40
       +
       

     

       47
       41
        
           You can specify the entry using either its atom ID or URL.

     

       48
       42
        
           Shows all metadata for the given entry, including title, dates, categories,

     

       49
       43
        
           and summarizes all inbound and outbound links to/from other posts.

     
···

       51
       45
        
           try:

     

       52
       46
        
               # Load configuration

     

       53
       47
        
               config = load_config(config_file)

     

       54
       54
       -
               

     

       48
       48
       +
       

     

       55
       49
        
               # Initialize Git store

     

       56
       50
        
               git_store = GitStore(config.git_store)

     

       57
       57
       -
               

     

       51
       51
       +
       

     

       58
       52
        
               # Find the entry

     

       59
       53
        
               entry = None

     

       60
       54
        
               found_username = None

     

       61
       61
       -
               

     

       55
       55
       +
       

     

       62
       56
        
               # Check if identifier looks like a URL

     

       63
       63
       -
               is_url = identifier.startswith(('http://', 'https://'))

     

       64
       64
       -
               

     

       57
       57
       +
               is_url = identifier.startswith(("http://", "https://"))

     

       58
       58
       +
       

     

       65
       59
        
               if username:

     

       66
       60
        
                   # Search specific username

     

       67
       61
        
                   if is_url:

     
···

       97
       91
        
                           if entry:

     

       98
       92
        
                               found_username = user

     

       99
       93
        
                               break

     

       100
       100
       -
               

     

       94
       94
       +
       

     

       101
       95
        
               if not entry or not found_username:

     

       102
       96
        
                   if username:

     

       103
       103
       -
                       console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found for user '{username}'[/red]")

     

       97
       97
       +
                       console.print(

     

       98
       98
       +
                           f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found for user '{username}'[/red]"

     

       99
       99
       +
                       )

     

       104
       100
        
                   else:

     

       105
       105
       -
                       console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found in any user's entries[/red]")

     

       101
       101
       +
                       console.print(

     

       102
       102
       +
                           f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found in any user's entries[/red]"

     

       103
       103
       +
                       )

     

       106
       104
        
                   raise typer.Exit(1)

     

       107
       107
       -
               

     

       108
       108
       -
               # Load reference index if available

     

       109
       109
       -
               references_path = config.git_store / "references.json"

     

       110
       110
       -
               ref_index = None

     

       111
       111
       -
               if references_path.exists():

     

       112
       112
       -
                   with open(references_path) as f:

     

       113
       113
       -
                       index_data = json.load(f)

     

       114
       114
       -
                   ref_index = ReferenceIndex.from_dict(index_data)

     

       115
       115
       -
               

     

       105
       105
       +
       

     

       116
       106
        
               # Display information

     

       117
       107
        
               if get_tsv_mode():

     

       118
       118
       -
                   _display_entry_info_tsv(entry, found_username, ref_index, show_content)

     

       108
       108
       +
                   _display_entry_info_tsv(entry, found_username, show_content)

     

       119
       109
        
               else:

     

       120
       110
        
                   _display_entry_info(entry, found_username)

     

       121
       121
       -
                   

     

       122
       122
       -
                   if ref_index:

     

       123
       123
       -
                       _display_link_info(entry, found_username, ref_index)

     

       124
       124
       -
                   else:

     

       125
       125
       -
                       console.print("\n[yellow]No reference index found. Run 'thicket index' to build cross-reference data.[/yellow]")

     

       126
       126
       -
                   

     

       111
       111
       +
       

     

       112
       112
       +
                   # Display links and backlinks from entry fields

     

       113
       113
       +
                   _display_link_info(entry, found_username, git_store)

     

       114
       114
       +
       

     

       127
       115
        
                   # Optionally display content

     

       128
       116
        
                   if show_content and entry.content:

     

       129
       117
        
                       _display_content(entry.content)

     

       130
       130
       -
                   

     

       118
       118
       +
       

     

       131
       119
        
           except Exception as e:

     

       132
       120
        
               console.print(f"[red]Error displaying entry info: {e}[/red]")

     

       133
       121
        
               raise typer.Exit(1)

     
···

       135
       123
        
       

     

       136
       124
        
       def _display_entry_info(entry, username: str) -> None:

     

       137
       125
        
           """Display basic entry information in a structured format."""

     

       138
       138
       -
           

     

       126
       126
       +
       

     

       139
       127
        
           # Create main info panel

     

       140
       128
        
           info_table = Table.grid(padding=(0, 2))

     

       141
       129
        
           info_table.add_column("Field", style="cyan bold", width=15)

     

       142
       130
        
           info_table.add_column("Value", style="white")

     

       143
       143
       -
           

     

       131
       131
       +
       

     

       144
       132
        
           info_table.add_row("User", f"[green]{username}[/green]")

     

       145
       133
        
           info_table.add_row("Atom ID", f"[blue]{entry.id}[/blue]")

     

       146
       134
        
           info_table.add_row("Title", entry.title)

     

       147
       135
        
           info_table.add_row("Link", str(entry.link))

     

       148
       148
       -
           

     

       136
       136
       +
       

     

       149
       137
        
           if entry.published:

     

       150
       150
       -
               info_table.add_row("Published", entry.published.strftime("%Y-%m-%d %H:%M:%S UTC"))

     

       151
       151
       -
           

     

       138
       138
       +
               info_table.add_row(

     

       139
       139
       +
                   "Published", entry.published.strftime("%Y-%m-%d %H:%M:%S UTC")

     

       140
       140
       +
               )

     

       141
       141
       +
       

     

       152
       142
        
           info_table.add_row("Updated", entry.updated.strftime("%Y-%m-%d %H:%M:%S UTC"))

     

       153
       153
       -
           

     

       143
       143
       +
       

     

       154
       144
        
           if entry.summary:

     

       155
       145
        
               # Truncate long summaries

     

       156
       156
       -
               summary = entry.summary[:200] + "..." if len(entry.summary) > 200 else entry.summary

     

       146
       146
       +
               summary = (

     

       147
       147
       +
                   entry.summary[:200] + "..." if len(entry.summary) > 200 else entry.summary

     

       148
       148
       +
               )

     

       157
       149
        
               info_table.add_row("Summary", summary)

     

       158
       158
       -
           

     

       150
       150
       +
       

     

       159
       151
        
           if entry.categories:

     

       160
       152
        
               categories_text = ", ".join(entry.categories)

     

       161
       153
        
               info_table.add_row("Categories", categories_text)

     

       162
       162
       -
           

     

       154
       154
       +
       

     

       163
       155
        
           if entry.author:

     

       164
       156
        
               author_info = []

     

       165
       157
        
               if "name" in entry.author:

     
···

       168
       160
        
                   author_info.append(f"<{entry.author['email']}>")

     

       169
       161
        
               if author_info:

     

       170
       162
        
                   info_table.add_row("Author", " ".join(author_info))

     

       171
       171
       -
           

     

       163
       163
       +
       

     

       172
       164
        
           if entry.content_type:

     

       173
       165
        
               info_table.add_row("Content Type", entry.content_type)

     

       174
       174
       -
           

     

       166
       166
       +
       

     

       175
       167
        
           if entry.rights:

     

       176
       168
        
               info_table.add_row("Rights", entry.rights)

     

       177
       177
       -
           

     

       169
       169
       +
       

     

       178
       170
        
           if entry.source:

     

       179
       171
        
               info_table.add_row("Source Feed", entry.source)

     

       180
       180
       -
           

     

       172
       172
       +
       

     

       181
       173
        
           panel = Panel(

     

       182
       182
       -
               info_table,

     

       183
       183
       -
               title=f"[bold]Entry Information[/bold]",

     

       184
       184
       -
               border_style="blue"

     

       174
       174
       +
               info_table, title="[bold]Entry Information[/bold]", border_style="blue"

     

       185
       175
        
           )

     

       186
       186
       -
           

     

       176
       176
       +
       

     

       187
       177
        
           console.print(panel)

     

       188
       178
        
       

     

       189
       179
        
       

     

       190
       190
       -
       def _display_link_info(entry, username: str, ref_index: ReferenceIndex) -> None:

     

       180
       180
       +
       def _display_link_info(entry, username: str, git_store: GitStore) -> None:

     

       191
       181
        
           """Display inbound and outbound link information."""

     

       192
       192
       -
           

     

       193
       193
       -
           # Get links

     

       194
       194
       -
           outbound_refs = ref_index.get_outbound_refs(username, entry.id)

     

       195
       195
       -
           inbound_refs = ref_index.get_inbound_refs(username, entry.id)

     

       196
       196
       -
           

     

       197
       197
       -
           if not outbound_refs and not inbound_refs:

     

       182
       182
       +
       

     

       183
       183
       +
           # Get links from entry fields

     

       184
       184
       +
           outbound_links = getattr(entry, "links", [])

     

       185
       185
       +
           backlinks = getattr(entry, "backlinks", [])

     

       186
       186
       +
       

     

       187
       187
       +
           if not outbound_links and not backlinks:

     

       198
       188
        
               console.print("\n[dim]No cross-references found for this entry.[/dim]")

     

       199
       189
        
               return

     

       200
       200
       -
           

     

       190
       190
       +
       

     

       201
       191
        
           # Create links table

     

       202
       192
        
           links_table = Table(title="Cross-References")

     

       203
       193
        
           links_table.add_column("Direction", style="cyan", width=10)

     

       204
       204
       -
           links_table.add_column("Target/Source", style="green", width=20)

     

       205
       205
       -
           links_table.add_column("URL", style="blue", width=50)

     

       206
       206
       -
           

     

       207
       207
       -
           # Add outbound references

     

       208
       208
       -
           for ref in outbound_refs:

     

       209
       209
       -
               target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"

     

       210
       210
       -
               links_table.add_row("→ Out", target_info, ref.target_url)

     

       211
       211
       -
           

     

       212
       212
       -
           # Add inbound references

     

       213
       213
       -
           for ref in inbound_refs:

     

       214
       214
       -
               source_info = f"{ref.source_username}:{ref.source_entry_id}"

     

       215
       215
       -
               links_table.add_row("← In", source_info, ref.target_url)

     

       216
       216
       -
           

     

       194
       194
       +
           links_table.add_column("Target/Source", style="green", width=30)

     

       195
       195
       +
           links_table.add_column("URL/ID", style="blue", width=60)

     

       196
       196
       +
       

     

       197
       197
       +
           # Add outbound links

     

       198
       198
       +
           for link in outbound_links:

     

       199
       199
       +
               links_table.add_row("→ Out", "External/Other", link)

     

       200
       200
       +
       

     

       201
       201
       +
           # Add backlinks (inbound references)

     

       202
       202
       +
           for backlink_id in backlinks:

     

       203
       203
       +
               # Try to find which user this entry belongs to

     

       204
       204
       +
               source_info = backlink_id

     

       205
       205
       +
               # Could enhance this by looking up the actual entry to get username

     

       206
       206
       +
               links_table.add_row("← In", "Entry", source_info)

     

       207
       207
       +
       

     

       217
       208
        
           console.print()

     

       218
       209
        
           console.print(links_table)

     

       219
       219
       -
           

     

       210
       210
       +
       

     

       220
       211
        
           # Summary

     

       221
       221
       -
           console.print(f"\n[bold]Summary:[/bold] {len(outbound_refs)} outbound, {len(inbound_refs)} inbound references")

     

       212
       212
       +
           console.print(

     

       213
       213
       +
               f"\n[bold]Summary:[/bold] {len(outbound_links)} outbound links, {len(backlinks)} inbound backlinks"

     

       214
       214
       +
           )

     

       222
       215
        
       

     

       223
       216
        
       

     

       224
       217
        
       def _display_content(content: str) -> None:

     

       225
       218
        
           """Display the full content of the entry."""

     

       226
       226
       -
           

     

       219
       219
       +
       

     

       227
       220
        
           # Truncate very long content

     

       228
       221
        
           display_content = content

     

       229
       222
        
           if len(content) > 5000:

     

       230
       223
        
               display_content = content[:5000] + "\n\n[... content truncated ...]"

     

       231
       231
       -
           

     

       224
       224
       +
       

     

       232
       225
        
           panel = Panel(

     

       233
       226
        
               display_content,

     

       234
       227
        
               title="[bold]Entry Content[/bold]",

     

       235
       228
        
               border_style="green",

     

       236
       236
       -
               expand=False

     

       229
       229
       +
               expand=False,

     

       237
       230
        
           )

     

       238
       238
       -
           

     

       231
       231
       +
       

     

       239
       232
        
           console.print()

     

       240
       233
        
           console.print(panel)

     

       241
       234
        
       

     

       242
       235
        
       

     

       243
       243
       -
       def _display_entry_info_tsv(entry, username: str, ref_index: Optional[ReferenceIndex], show_content: bool) -> None:

     

       236
       236
       +
       def _display_entry_info_tsv(entry, username: str, show_content: bool) -> None:

     

       244
       237
        
           """Display entry information in TSV format."""

     

       245
       245
       -
           

     

       238
       238
       +
       

     

       246
       239
        
           # Basic info

     

       247
       240
        
           print("Field\tValue")

     

       248
       241
        
           print(f"User\t{username}")

     

       249
       242
        
           print(f"Atom ID\t{entry.id}")

     

       250
       250
       -
           print(f"Title\t{entry.title.replace(chr(9), ' ').replace(chr(10), ' ').replace(chr(13), ' ')}")

     

       243
       243
       +
           print(

     

       244
       244
       +
               f"Title\t{entry.title.replace(chr(9), ' ').replace(chr(10), ' ').replace(chr(13), ' ')}"

     

       245
       245
       +
           )

     

       251
       246
        
           print(f"Link\t{entry.link}")

     

       252
       252
       -
           

     

       247
       247
       +
       

     

       253
       248
        
           if entry.published:

     

       254
       249
        
               print(f"Published\t{entry.published.strftime('%Y-%m-%d %H:%M:%S UTC')}")

     

       255
       255
       -
           

     

       250
       250
       +
       

     

       256
       251
        
           print(f"Updated\t{entry.updated.strftime('%Y-%m-%d %H:%M:%S UTC')}")

     

       257
       257
       -
           

     

       252
       252
       +
       

     

       258
       253
        
           if entry.summary:

     

       259
       254
        
               # Escape tabs and newlines in summary

     

       260
       260
       -
               summary = entry.summary.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')

     

       255
       255
       +
               summary = entry.summary.replace("\t", " ").replace("\n", " ").replace("\r", " ")

     

       261
       256
        
               print(f"Summary\t{summary}")

     

       262
       262
       -
           

     

       257
       257
       +
       

     

       263
       258
        
           if entry.categories:

     

       264
       259
        
               print(f"Categories\t{', '.join(entry.categories)}")

     

       265
       265
       -
           

     

       260
       260
       +
       

     

       266
       261
        
           if entry.author:

     

       267
       262
        
               author_info = []

     

       268
       263
        
               if "name" in entry.author:

     
···

       271
       266
        
                   author_info.append(f"<{entry.author['email']}>")

     

       272
       267
        
               if author_info:

     

       273
       268
        
                   print(f"Author\t{' '.join(author_info)}")

     

       274
       274
       -
           

     

       269
       269
       +
       

     

       275
       270
        
           if entry.content_type:

     

       276
       271
        
               print(f"Content Type\t{entry.content_type}")

     

       277
       277
       -
           

     

       272
       272
       +
       

     

       278
       273
        
           if entry.rights:

     

       279
       274
        
               print(f"Rights\t{entry.rights}")

     

       280
       280
       -
           

     

       275
       275
       +
       

     

       281
       276
        
           if entry.source:

     

       282
       277
        
               print(f"Source Feed\t{entry.source}")

     

       283
       283
       -
           

     

       284
       284
       -
           # Add reference info if available

     

       285
       285
       -
           if ref_index:

     

       286
       286
       -
               outbound_refs = ref_index.get_outbound_refs(username, entry.id)

     

       287
       287
       -
               inbound_refs = ref_index.get_inbound_refs(username, entry.id)

     

       288
       288
       -
               

     

       289
       289
       -
               print(f"Outbound References\t{len(outbound_refs)}")

     

       290
       290
       -
               print(f"Inbound References\t{len(inbound_refs)}")

     

       291
       291
       -
               

     

       292
       292
       -
               # Show each reference

     

       293
       293
       -
               for ref in outbound_refs:

     

       294
       294
       -
                   target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External"

     

       295
       295
       -
                   print(f"Outbound Reference\t{target_info}\t{ref.target_url}")

     

       296
       296
       -
               

     

       297
       297
       -
               for ref in inbound_refs:

     

       298
       298
       -
                   source_info = f"{ref.source_username}:{ref.source_entry_id}"

     

       299
       299
       -
                   print(f"Inbound Reference\t{source_info}\t{ref.target_url}")

     

       300
       300
       -
           

     

       278
       278
       +
       

     

       279
       279
       +
           # Add links info from entry fields

     

       280
       280
       +
           outbound_links = getattr(entry, "links", [])

     

       281
       281
       +
           backlinks = getattr(entry, "backlinks", [])

     

       282
       282
       +
       

     

       283
       283
       +
           if outbound_links or backlinks:

     

       284
       284
       +
               print(f"Outbound Links\t{len(outbound_links)}")

     

       285
       285
       +
               print(f"Backlinks\t{len(backlinks)}")

     

       286
       286
       +
       

     

       287
       287
       +
               # Show each link

     

       288
       288
       +
               for link in outbound_links:

     

       289
       289
       +
                   print(f"→ Link\t{link}")

     

       290
       290
       +
       

     

       291
       291
       +
               for backlink_id in backlinks:

     

       292
       292
       +
                   print(f"← Backlink\t{backlink_id}")

     

       293
       293
       +
       

     

       301
       294
        
           # Show content if requested

     

       302
       295
        
           if show_content and entry.content:

     

       303
       296
        
               # Escape tabs and newlines in content

     

       304
       304
       -
               content = entry.content.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')

     

       305
       305
       -
               print(f"Content\t{content}")
     

       297
       297
       +
               content = entry.content.replace("\t", " ").replace("\n", " ").replace("\r", " ")

     

       298
       298
       +
               print(f"Content\t{content}")

+5 -6

src/thicket/cli/commands/init.py

···

       14
       14
        
       

     

       15
       15
        
       @app.command()

     

       16
       16
        
       def init(

     

       17
       17
       -
           git_store: Path = typer.Argument(..., help="Path to Git repository for storing feeds"),

     

       17
       17
       +
           git_store: Path = typer.Argument(

     

       18
       18
       +
               ..., help="Path to Git repository for storing feeds"

     

       19
       19
       +
           ),

     

       18
       20
        
           cache_dir: Optional[Path] = typer.Option(

     

       19
       21
        
               None, "--cache-dir", "-c", help="Cache directory (default: ~/.cache/thicket)"

     

       20
       22
        
           ),

     
···

       30
       32
        
           # Set default paths

     

       31
       33
        
           if cache_dir is None:

     

       32
       34
        
               from platformdirs import user_cache_dir

     

       35
       35
       +
       

     

       33
       36
        
               cache_dir = Path(user_cache_dir("thicket"))

     

       34
       37
        
       

     

       35
       38
        
           if config_file is None:

     
···

       54
       57
        
       

     

       55
       58
        
           # Create configuration

     

       56
       59
        
           try:

     

       57
       57
       -
               config = ThicketConfig(

     

       58
       58
       -
                   git_store=git_store,

     

       59
       59
       -
                   cache_dir=cache_dir,

     

       60
       60
       -
                   users=[]

     

       61
       61
       -
               )

     

       60
       60
       +
               config = ThicketConfig(git_store=git_store, cache_dir=cache_dir, users=[])

     

       62
       61
        
       

     

       63
       62
        
               save_config(config, config_file)

     

       64
       63
        
               print_success(f"Created configuration file: {config_file}")

-422

src/thicket/cli/commands/links_cmd.py

···

       1
       1
       -
       """CLI command for extracting and categorizing all outbound links from blog entries."""

     

       2
       2
       -
       

     

       3
       3
       -
       import json

     

       4
       4
       -
       import re

     

       5
       5
       -
       from pathlib import Path

     

       6
       6
       -
       from typing import Dict, List, Optional, Set

     

       7
       7
       -
       from urllib.parse import urljoin, urlparse

     

       8
       8
       -
       

     

       9
       9
       -
       import typer

     

       10
       10
       -
       from rich.console import Console

     

       11
       11
       -
       from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn

     

       12
       12
       -
       from rich.table import Table

     

       13
       13
       -
       

     

       14
       14
       -
       from ...core.git_store import GitStore

     

       15
       15
       -
       from ..main import app

     

       16
       16
       -
       from ..utils import load_config, get_tsv_mode

     

       17
       17
       -
       

     

       18
       18
       -
       console = Console()

     

       19
       19
       -
       

     

       20
       20
       -
       

     

       21
       21
       -
       class LinkData:

     

       22
       22
       -
           """Represents a link found in a blog entry."""

     

       23
       23
       -
           

     

       24
       24
       -
           def __init__(self, url: str, entry_id: str, username: str):

     

       25
       25
       -
               self.url = url

     

       26
       26
       -
               self.entry_id = entry_id

     

       27
       27
       -
               self.username = username

     

       28
       28
       -
           

     

       29
       29
       -
           def to_dict(self) -> dict:

     

       30
       30
       -
               """Convert to dictionary for JSON serialization."""

     

       31
       31
       -
               return {

     

       32
       32
       -
                   "url": self.url,

     

       33
       33
       -
                   "entry_id": self.entry_id,

     

       34
       34
       -
                   "username": self.username

     

       35
       35
       -
               }

     

       36
       36
       -
           

     

       37
       37
       -
           @classmethod

     

       38
       38
       -
           def from_dict(cls, data: dict) -> "LinkData":

     

       39
       39
       -
               """Create from dictionary."""

     

       40
       40
       -
               return cls(

     

       41
       41
       -
                   url=data["url"],

     

       42
       42
       -
                   entry_id=data["entry_id"],

     

       43
       43
       -
                   username=data["username"]

     

       44
       44
       -
               )

     

       45
       45
       -
       

     

       46
       46
       -
       

     

       47
       47
       -
       class LinkCategorizer:

     

       48
       48
       -
           """Categorizes links as internal, user, or unknown."""

     

       49
       49
       -
           

     

       50
       50
       -
           def __init__(self, user_domains: Dict[str, Set[str]]):

     

       51
       51
       -
               self.user_domains = user_domains

     

       52
       52
       -
               # Create reverse mapping of domain -> username

     

       53
       53
       -
               self.domain_to_user = {}

     

       54
       54
       -
               for username, domains in user_domains.items():

     

       55
       55
       -
                   for domain in domains:

     

       56
       56
       -
                       self.domain_to_user[domain] = username

     

       57
       57
       -
           

     

       58
       58
       -
           def categorize_url(self, url: str, source_username: str) -> tuple[str, Optional[str]]:

     

       59
       59
       -
               """

     

       60
       60
       -
               Categorize a URL as 'internal', 'user', or 'unknown'.

     

       61
       61
       -
               Returns (category, target_username).

     

       62
       62
       -
               """

     

       63
       63
       -
               try:

     

       64
       64
       -
                   parsed = urlparse(url)

     

       65
       65
       -
                   domain = parsed.netloc.lower()

     

       66
       66
       -
                   

     

       67
       67
       -
                   # Check if it's a link to the same user's domain (internal)

     

       68
       68
       -
                   if domain in self.user_domains.get(source_username, set()):

     

       69
       69
       -
                       return "internal", source_username

     

       70
       70
       -
                   

     

       71
       71
       -
                   # Check if it's a link to another user's domain

     

       72
       72
       -
                   if domain in self.domain_to_user:

     

       73
       73
       -
                       return "user", self.domain_to_user[domain]

     

       74
       74
       -
                   

     

       75
       75
       -
                   # Everything else is unknown

     

       76
       76
       -
                   return "unknown", None

     

       77
       77
       -
                   

     

       78
       78
       -
               except Exception:

     

       79
       79
       -
                   return "unknown", None

     

       80
       80
       -
       

     

       81
       81
       -
       

     

       82
       82
       -
       class LinkExtractor:

     

       83
       83
       -
           """Extracts and resolves links from blog entries."""

     

       84
       84
       -
           

     

       85
       85
       -
           def __init__(self):

     

       86
       86
       -
               # Pattern for extracting links from HTML

     

       87
       87
       -
               self.link_pattern = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL)

     

       88
       88
       -
               self.url_pattern = re.compile(r'https?://[^\s<>"]+')

     

       89
       89
       -
           

     

       90
       90
       -
           def extract_links_from_html(self, html_content: str, base_url: str) -> List[tuple[str, str]]:

     

       91
       91
       -
               """Extract all links from HTML content and resolve them against base URL."""

     

       92
       92
       -
               links = []

     

       93
       93
       -
               

     

       94
       94
       -
               # Extract links from <a> tags

     

       95
       95
       -
               for match in self.link_pattern.finditer(html_content):

     

       96
       96
       -
                   url = match.group(1)

     

       97
       97
       -
                   text = re.sub(r'<[^>]+>', '', match.group(2)).strip()  # Remove HTML tags from link text

     

       98
       98
       -
                   

     

       99
       99
       -
                   # Resolve relative URLs against base URL

     

       100
       100
       -
                   resolved_url = urljoin(base_url, url)

     

       101
       101
       -
                   links.append((resolved_url, text))

     

       102
       102
       -
               

     

       103
       103
       -
               return links

     

       104
       104
       -
           

     

       105
       105
       -
           

     

       106
       106
       -
           def extract_links_from_entry(self, entry, username: str, base_url: str) -> List[LinkData]:

     

       107
       107
       -
               """Extract all links from a blog entry."""

     

       108
       108
       -
               links = []

     

       109
       109
       -
               

     

       110
       110
       -
               # Combine all text content for analysis

     

       111
       111
       -
               content_to_search = []

     

       112
       112
       -
               if entry.content:

     

       113
       113
       -
                   content_to_search.append(entry.content)

     

       114
       114
       -
               if entry.summary:

     

       115
       115
       -
                   content_to_search.append(entry.summary)

     

       116
       116
       -
               

     

       117
       117
       -
               for content in content_to_search:

     

       118
       118
       -
                   extracted_links = self.extract_links_from_html(content, base_url)

     

       119
       119
       -
                   

     

       120
       120
       -
                   for url, link_text in extracted_links:

     

       121
       121
       -
                       # Skip empty URLs

     

       122
       122
       -
                       if not url or url.startswith('#'):

     

       123
       123
       -
                           continue

     

       124
       124
       -
                       

     

       125
       125
       -
                       link_data = LinkData(

     

       126
       126
       -
                           url=url,

     

       127
       127
       -
                           entry_id=entry.id,

     

       128
       128
       -
                           username=username

     

       129
       129
       -
                       )

     

       130
       130
       -
                       

     

       131
       131
       -
                       links.append(link_data)

     

       132
       132
       -
               

     

       133
       133
       -
               return links

     

       134
       134
       -
       

     

       135
       135
       -
       

     

       136
       136
       -
       @app.command()

     

       137
       137
       -
       def links(

     

       138
       138
       -
           config_file: Optional[Path] = typer.Option(

     

       139
       139
       -
               Path("thicket.yaml"),

     

       140
       140
       -
               "--config",

     

       141
       141
       -
               "-c",

     

       142
       142
       -
               help="Path to configuration file",

     

       143
       143
       -
           ),

     

       144
       144
       -
           output_file: Optional[Path] = typer.Option(

     

       145
       145
       -
               None,

     

       146
       146
       -
               "--output",

     

       147
       147
       -
               "-o",

     

       148
       148
       -
               help="Path to output links file (default: links.json in git store)",

     

       149
       149
       -
           ),

     

       150
       150
       -
           mapping_file: Optional[Path] = typer.Option(

     

       151
       151
       -
               None,

     

       152
       152
       -
               "--mapping",

     

       153
       153
       -
               "-m",

     

       154
       154
       -
               help="Path to output URL <-> atom ID mapping file (default: url_mapping.json in git store)",

     

       155
       155
       -
           ),

     

       156
       156
       -
           verbose: bool = typer.Option(

     

       157
       157
       -
               False,

     

       158
       158
       -
               "--verbose",

     

       159
       159
       -
               "-v",

     

       160
       160
       -
               help="Show detailed progress information",

     

       161
       161
       -
           ),

     

       162
       162
       -
       ) -> None:

     

       163
       163
       -
           """Extract and categorize all outbound links from blog entries.

     

       164
       164
       -
           

     

       165
       165
       -
           This command analyzes all blog entries to extract outbound links,

     

       166
       166
       -
           resolve them properly with respect to the feed's base URL, and

     

       167
       167
       -
           categorize them as internal, user, or unknown links.

     

       168
       168
       -
           """

     

       169
       169
       -
           try:

     

       170
       170
       -
               # Load configuration

     

       171
       171
       -
               config = load_config(config_file)

     

       172
       172
       -
       

     

       173
       173
       -
               # Initialize Git store

     

       174
       174
       -
               git_store = GitStore(config.git_store)

     

       175
       175
       -
               

     

       176
       176
       -
               # Build user domain mapping

     

       177
       177
       -
               if verbose:

     

       178
       178
       -
                   console.print("Building user domain mapping...")

     

       179
       179
       -
               

     

       180
       180
       -
               index = git_store._load_index()

     

       181
       181
       -
               user_domains = {}

     

       182
       182
       -
               

     

       183
       183
       -
               for username, user_metadata in index.users.items():

     

       184
       184
       -
                   domains = set()

     

       185
       185
       -
                   

     

       186
       186
       -
                   # Add domains from feeds

     

       187
       187
       -
                   for feed_url in user_metadata.feeds:

     

       188
       188
       -
                       domain = urlparse(feed_url).netloc.lower()

     

       189
       189
       -
                       if domain:

     

       190
       190
       -
                           domains.add(domain)

     

       191
       191
       -
                   

     

       192
       192
       -
                   # Add domain from homepage

     

       193
       193
       -
                   if user_metadata.homepage:

     

       194
       194
       -
                       domain = urlparse(str(user_metadata.homepage)).netloc.lower()

     

       195
       195
       -
                       if domain:

     

       196
       196
       -
                           domains.add(domain)

     

       197
       197
       -
                   

     

       198
       198
       -
                   user_domains[username] = domains

     

       199
       199
       -
               

     

       200
       200
       -
               if verbose:

     

       201
       201
       -
                   console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains")

     

       202
       202
       -
               

     

       203
       203
       -
               # Initialize components

     

       204
       204
       -
               link_extractor = LinkExtractor()

     

       205
       205
       -
               categorizer = LinkCategorizer(user_domains)

     

       206
       206
       -
               

     

       207
       207
       -
               # Get all users

     

       208
       208
       -
               users = list(index.users.keys())

     

       209
       209
       -
               

     

       210
       210
       -
               if not users:

     

       211
       211
       -
                   console.print("[yellow]No users found in Git store[/yellow]")

     

       212
       212
       -
                   raise typer.Exit(0)

     

       213
       213
       -
               

     

       214
       214
       -
               # Process all entries

     

       215
       215
       -
               all_links = []

     

       216
       216
       -
               link_categories = {"internal": [], "user": [], "unknown": []}

     

       217
       217
       -
               link_dict = {}  # Dictionary with link URL as key, maps to list of atom IDs

     

       218
       218
       -
               reverse_dict = {}  # Dictionary with atom ID as key, maps to list of URLs

     

       219
       219
       -
               

     

       220
       220
       -
               with Progress(

     

       221
       221
       -
                   SpinnerColumn(),

     

       222
       222
       -
                   TextColumn("[progress.description]{task.description}"),

     

       223
       223
       -
                   BarColumn(),

     

       224
       224
       -
                   TaskProgressColumn(),

     

       225
       225
       -
                   console=console,

     

       226
       226
       -
               ) as progress:

     

       227
       227
       -
                   

     

       228
       228
       -
                   # Count total entries first

     

       229
       229
       -
                   counting_task = progress.add_task("Counting entries...", total=len(users))

     

       230
       230
       -
                   total_entries = 0

     

       231
       231
       -
                   

     

       232
       232
       -
                   for username in users:

     

       233
       233
       -
                       entries = git_store.list_entries(username)

     

       234
       234
       -
                       total_entries += len(entries)

     

       235
       235
       -
                       progress.advance(counting_task)

     

       236
       236
       -
                   

     

       237
       237
       -
                   progress.remove_task(counting_task)

     

       238
       238
       -
                   

     

       239
       239
       -
                   # Process entries

     

       240
       240
       -
                   processing_task = progress.add_task(

     

       241
       241
       -
                       f"Processing {total_entries} entries...", 

     

       242
       242
       -
                       total=total_entries

     

       243
       243
       -
                   )

     

       244
       244
       -
                   

     

       245
       245
       -
                   for username in users:

     

       246
       246
       -
                       entries = git_store.list_entries(username)

     

       247
       247
       -
                       user_metadata = index.users[username]

     

       248
       248
       -
                       

     

       249
       249
       -
                       # Get base URL for this user (use first feed URL)

     

       250
       250
       -
                       base_url = str(user_metadata.feeds[0]) if user_metadata.feeds else "https://example.com"

     

       251
       251
       -
                       

     

       252
       252
       -
                       for entry in entries:

     

       253
       253
       -
                           # Extract links from this entry

     

       254
       254
       -
                           entry_links = link_extractor.extract_links_from_entry(entry, username, base_url)

     

       255
       255
       -
                           

     

       256
       256
       -
                           # Track unique links per entry

     

       257
       257
       -
                           entry_urls_seen = set()

     

       258
       258
       -
                           

     

       259
       259
       -
                           # Categorize each link

     

       260
       260
       -
                           for link_data in entry_links:

     

       261
       261
       -
                               # Skip if we've already seen this URL in this entry

     

       262
       262
       -
                               if link_data.url in entry_urls_seen:

     

       263
       263
       -
                                   continue

     

       264
       264
       -
                               entry_urls_seen.add(link_data.url)

     

       265
       265
       -
                               

     

       266
       266
       -
                               category, target_username = categorizer.categorize_url(link_data.url, username)

     

       267
       267
       -
                               

     

       268
       268
       -
                               # Add to link dictionary (URL as key, maps to list of atom IDs)

     

       269
       269
       -
                               if link_data.url not in link_dict:

     

       270
       270
       -
                                   link_dict[link_data.url] = []

     

       271
       271
       -
                               if link_data.entry_id not in link_dict[link_data.url]:

     

       272
       272
       -
                                   link_dict[link_data.url].append(link_data.entry_id)

     

       273
       273
       -
                                   

     

       274
       274
       -
                                   # Also add to reverse mapping (atom ID -> list of URLs)

     

       275
       275
       -
                                   if link_data.entry_id not in reverse_dict:

     

       276
       276
       -
                                       reverse_dict[link_data.entry_id] = []

     

       277
       277
       -
                                   if link_data.url not in reverse_dict[link_data.entry_id]:

     

       278
       278
       -
                                       reverse_dict[link_data.entry_id].append(link_data.url)

     

       279
       279
       -
                               

     

       280
       280
       -
                               # Add category info to link data for categories tracking

     

       281
       281
       -
                               link_info = link_data.to_dict()

     

       282
       282
       -
                               link_info["category"] = category

     

       283
       283
       -
                               link_info["target_username"] = target_username

     

       284
       284
       -
                               

     

       285
       285
       -
                               all_links.append(link_info)

     

       286
       286
       -
                               link_categories[category].append(link_info)

     

       287
       287
       -
                           

     

       288
       288
       -
                           progress.advance(processing_task)

     

       289
       289
       -
                           

     

       290
       290
       -
                           if verbose and entry_links:

     

       291
       291
       -
                               console.print(f"  Found {len(entry_links)} links in {username}:{entry.title[:50]}...")

     

       292
       292
       -
               

     

       293
       293
       -
               # Determine output paths

     

       294
       294
       -
               if output_file:

     

       295
       295
       -
                   output_path = output_file

     

       296
       296
       -
               else:

     

       297
       297
       -
                   output_path = config.git_store / "links.json"

     

       298
       298
       -
               

     

       299
       299
       -
               if mapping_file:

     

       300
       300
       -
                   mapping_path = mapping_file

     

       301
       301
       -
               else:

     

       302
       302
       -
                   mapping_path = config.git_store / "url_mapping.json"

     

       303
       303
       -
               

     

       304
       304
       -
               # Save all extracted links (not just filtered ones)

     

       305
       305
       -
               if verbose:

     

       306
       306
       -
                   console.print("Preparing output data...")

     

       307
       307
       -
               

     

       308
       308
       -
               # Build a set of all URLs that correspond to posts in the git database

     

       309
       309
       -
               registered_urls = set()

     

       310
       310
       -
               

     

       311
       311
       -
               # Get all entries from all users and build URL mappings

     

       312
       312
       -
               for username in users:

     

       313
       313
       -
                   entries = git_store.list_entries(username)

     

       314
       314
       -
                   user_metadata = index.users[username]

     

       315
       315
       -
                   

     

       316
       316
       -
                   for entry in entries:

     

       317
       317
       -
                       # Try to match entry URLs with extracted links

     

       318
       318
       -
                       if hasattr(entry, 'link') and entry.link:

     

       319
       319
       -
                           registered_urls.add(str(entry.link))

     

       320
       320
       -
                       

     

       321
       321
       -
                       # Also check entry alternate links if they exist

     

       322
       322
       -
                       if hasattr(entry, 'links') and entry.links:

     

       323
       323
       -
                           for link in entry.links:

     

       324
       324
       -
                               if hasattr(link, 'href') and link.href:

     

       325
       325
       -
                                   registered_urls.add(str(link.href))

     

       326
       326
       -
               

     

       327
       327
       -
               # Create filtered version for URL mapping (only links to registered posts)

     

       328
       328
       -
               filtered_link_dict = {}

     

       329
       329
       -
               filtered_reverse_dict = {}

     

       330
       330
       -
               

     

       331
       331
       -
               for url, entry_ids in link_dict.items():

     

       332
       332
       -
                   if url in registered_urls:

     

       333
       333
       -
                       filtered_link_dict[url] = entry_ids

     

       334
       334
       -
                       

     

       335
       335
       -
                       # Also update reverse mapping

     

       336
       336
       -
                       for entry_id in entry_ids:

     

       337
       337
       -
                           if entry_id not in filtered_reverse_dict:

     

       338
       338
       -
                               filtered_reverse_dict[entry_id] = []

     

       339
       339
       -
                           if url not in filtered_reverse_dict[entry_id]:

     

       340
       340
       -
                               filtered_reverse_dict[entry_id].append(url)

     

       341
       341
       -
               

     

       342
       342
       -
               # Use all links for main output, not filtered ones

     

       343
       343
       -
               output_data = link_dict

     

       344
       344
       -
               

     

       345
       345
       -
               if verbose:

     

       346
       346
       -
                   console.print(f"Found {len(registered_urls)} registered post URLs")

     

       347
       347
       -
                   console.print(f"Found {len(link_dict)} total links, {len(filtered_link_dict)} links to registered posts")

     

       348
       348
       -
               

     

       349
       349
       -
               # Save links data (URL -> atom ID mapping, all links)

     

       350
       350
       -
               with open(output_path, "w") as f:

     

       351
       351
       -
                   json.dump(output_data, f, indent=2, default=str)

     

       352
       352
       -
               

     

       353
       353
       -
               # Save bidirectional mapping file (filtered)

     

       354
       354
       -
               mapping_data = {

     

       355
       355
       -
                   "url_to_atom": filtered_link_dict,

     

       356
       356
       -
                   "atom_to_urls": filtered_reverse_dict

     

       357
       357
       -
               }

     

       358
       358
       -
               

     

       359
       359
       -
               with open(mapping_path, "w") as f:

     

       360
       360
       -
                   json.dump(mapping_data, f, indent=2, default=str)

     

       361
       361
       -
               

     

       362
       362
       -
               # Show summary

     

       363
       363
       -
               if not get_tsv_mode():

     

       364
       364
       -
                   console.print("\n[green]✓ Links extraction completed successfully[/green]")

     

       365
       365
       -
               

     

       366
       366
       -
               # Create summary table or TSV output

     

       367
       367
       -
               if get_tsv_mode():

     

       368
       368
       -
                   print("Category\tCount\tDescription")

     

       369
       369
       -
                   print(f"Internal\t{len(link_categories['internal'])}\tLinks to same user's domain")

     

       370
       370
       -
                   print(f"User\t{len(link_categories['user'])}\tLinks to other tracked users")

     

       371
       371
       -
                   print(f"Unknown\t{len(link_categories['unknown'])}\tLinks to external sites")

     

       372
       372
       -
                   print(f"Total Extracted\t{len(all_links)}\tAll extracted links")

     

       373
       373
       -
                   print(f"Saved to Output\t{len(output_data)}\tLinks saved to output file")

     

       374
       374
       -
                   print(f"Cross-references\t{len(filtered_link_dict)}\tLinks to registered posts only")

     

       375
       375
       -
               else:

     

       376
       376
       -
                   table = Table(title="Links Summary")

     

       377
       377
       -
                   table.add_column("Category", style="cyan")

     

       378
       378
       -
                   table.add_column("Count", style="green")

     

       379
       379
       -
                   table.add_column("Description", style="white")

     

       380
       380
       -
                   

     

       381
       381
       -
                   table.add_row("Internal", str(len(link_categories["internal"])), "Links to same user's domain")

     

       382
       382
       -
                   table.add_row("User", str(len(link_categories["user"])), "Links to other tracked users")

     

       383
       383
       -
                   table.add_row("Unknown", str(len(link_categories["unknown"])), "Links to external sites")

     

       384
       384
       -
                   table.add_row("Total Extracted", str(len(all_links)), "All extracted links")

     

       385
       385
       -
                   table.add_row("Saved to Output", str(len(output_data)), "Links saved to output file")

     

       386
       386
       -
                   table.add_row("Cross-references", str(len(filtered_link_dict)), "Links to registered posts only")

     

       387
       387
       -
                   

     

       388
       388
       -
                   console.print(table)

     

       389
       389
       -
               

     

       390
       390
       -
               # Show user links if verbose

     

       391
       391
       -
               if verbose and link_categories["user"]:

     

       392
       392
       -
                   if get_tsv_mode():

     

       393
       393
       -
                       print("User Link Source\tUser Link Target\tLink Count")

     

       394
       394
       -
                       user_link_counts = {}

     

       395
       395
       -
                       

     

       396
       396
       -
                       for link in link_categories["user"]:

     

       397
       397
       -
                           key = f"{link['username']} -> {link['target_username']}"

     

       398
       398
       -
                           user_link_counts[key] = user_link_counts.get(key, 0) + 1

     

       399
       399
       -
                       

     

       400
       400
       -
                       for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:

     

       401
       401
       -
                           source, target = link_pair.split(" -> ")

     

       402
       402
       -
                           print(f"{source}\t{target}\t{count}")

     

       403
       403
       -
                   else:

     

       404
       404
       -
                       console.print("\n[bold]User-to-user links:[/bold]")

     

       405
       405
       -
                       user_link_counts = {}

     

       406
       406
       -
                       

     

       407
       407
       -
                       for link in link_categories["user"]:

     

       408
       408
       -
                           key = f"{link['username']} -> {link['target_username']}"

     

       409
       409
       -
                           user_link_counts[key] = user_link_counts.get(key, 0) + 1

     

       410
       410
       -
                       

     

       411
       411
       -
                       for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]:

     

       412
       412
       -
                           console.print(f"  {link_pair}: {count} links")

     

       413
       413
       -
               

     

       414
       414
       -
               if not get_tsv_mode():

     

       415
       415
       -
                   console.print(f"\nLinks output saved to: {output_path}")

     

       416
       416
       -
                   console.print(f"URL mapping saved to: {mapping_path}")

     

       417
       417
       -
       

     

       418
       418
       -
           except Exception as e:

     

       419
       419
       -
               console.print(f"[red]Error extracting links: {e}[/red]")

     

       420
       420
       -
               if verbose:

     

       421
       421
       -
                   console.print_exception()

     

       422
       422
       -
               raise typer.Exit(1)

+11 -11

src/thicket/cli/commands/list_cmd.py

···

       11
       11
        
       from ..main import app

     

       12
       12
        
       from ..utils import (

     

       13
       13
        
           console,

     

       14
       14
       +
           get_tsv_mode,

     

       14
       15
        
           load_config,

     

       16
       16
       +
           print_entries_tsv,

     

       15
       17
        
           print_error,

     

       16
       16
       -
           print_feeds_table,

     

       17
       18
        
           print_feeds_table_from_git,

     

       18
       19
        
           print_info,

     

       19
       19
       -
           print_users_table,

     

       20
       20
        
           print_users_table_from_git,

     

       21
       21
       -
           print_entries_tsv,

     

       22
       22
       -
           get_tsv_mode,

     

       23
       21
        
       )

     

       24
       22
        
       

     

       25
       23
        
       

     
···

       60
       58
        
           """List all users."""

     

       61
       59
        
           index = git_store._load_index()

     

       62
       60
        
           users = list(index.users.values())

     

       63
       63
       -
           

     

       61
       61
       +
       

     

       64
       62
        
           if not users:

     

       65
       63
        
               print_info("No users configured")

     

       66
       64
        
               return

     
···

       83
       81
        
           print_feeds_table_from_git(git_store, username)

     

       84
       82
        
       

     

       85
       83
        
       

     

       86
       86
       -
       def list_entries(git_store: GitStore, username: Optional[str] = None, limit: Optional[int] = None) -> None:

     

       84
       84
       +
       def list_entries(

     

       85
       85
       +
           git_store: GitStore, username: Optional[str] = None, limit: Optional[int] = None

     

       86
       86
       +
       ) -> None:

     

       87
       87
        
           """List entries, optionally filtered by user."""

     

       88
       88
        
       

     

       89
       89
        
           if username:

     
···

       123
       123
        
           """Clean HTML content for display in table."""

     

       124
       124
        
           if not content:

     

       125
       125
        
               return ""

     

       126
       126
       -
           

     

       126
       126
       +
       

     

       127
       127
        
           # Remove HTML tags

     

       128
       128
       -
           clean_text = re.sub(r'<[^>]+>', ' ', content)

     

       128
       128
       +
           clean_text = re.sub(r"<[^>]+>", " ", content)

     

       129
       129
        
           # Replace multiple whitespace with single space

     

       130
       130
       -
           clean_text = re.sub(r'\s+', ' ', clean_text)

     

       130
       130
       +
           clean_text = re.sub(r"\s+", " ", clean_text)

     

       131
       131
        
           # Strip and limit length

     

       132
       132
        
           clean_text = clean_text.strip()

     

       133
       133
        
           if len(clean_text) > 100:

     

       134
       134
        
               clean_text = clean_text[:97] + "..."

     

       135
       135
       -
           

     

       135
       135
       +
       

     

       136
       136
        
           return clean_text

     

       137
       137
        
       

     

       138
       138
        
       

     
···

       141
       141
        
           if get_tsv_mode():

     

       142
       142
        
               print_entries_tsv(entries_by_user, usernames)

     

       143
       143
        
               return

     

       144
       144
       -
               

     

       144
       144
       +
       

     

       145
       145
        
           table = Table(title="Feed Entries")

     

       146
       146
        
           table.add_column("User", style="cyan", no_wrap=True)

     

       147
       147
        
           table.add_column("Title", style="bold")

+15 -5

src/thicket/cli/commands/sync.py

···

       71
       71
        
               user_updated_entries = 0

     

       72
       72
        
       

     

       73
       73
        
               # Sync each feed for the user

     

       74
       74
       -
               for feed_url in track(user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"):

     

       74
       74
       +
               for feed_url in track(

     

       75
       75
       +
                   user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"

     

       76
       76
       +
               ):

     

       75
       77
        
                   try:

     

       76
       78
        
                       new_entries, updated_entries = asyncio.run(

     

       77
       79
        
                           sync_feed(git_store, user_metadata.username, feed_url, dry_run)

     
···

       83
       85
        
                       print_error(f"Failed to sync feed {feed_url}: {e}")

     

       84
       86
        
                       continue

     

       85
       87
        
       

     

       86
       86
       -
               print_info(f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated")

     

       88
       88
       +
               print_info(

     

       89
       89
       +
                   f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated"

     

       90
       90
       +
               )

     

       87
       91
        
               total_new_entries += user_new_entries

     

       88
       92
        
               total_updated_entries += user_updated_entries

     

       89
       93
        
       

     
···

       95
       99
        
       

     

       96
       100
        
           # Summary

     

       97
       101
        
           if dry_run:

     

       98
       98
       -
               print_info(f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated")

     

       102
       102
       +
               print_info(

     

       103
       103
       +
                   f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated"

     

       104
       104
       +
               )

     

       99
       105
        
           else:

     

       100
       100
       -
               print_success(f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated")

     

       106
       106
       +
               print_success(

     

       107
       107
       +
                   f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated"

     

       108
       108
       +
               )

     

       101
       109
        
       

     

       102
       110
        
       

     

       103
       103
       -
       async def sync_feed(git_store: GitStore, username: str, feed_url, dry_run: bool) -> tuple[int, int]:

     

       111
       111
       +
       async def sync_feed(

     

       112
       112
       +
           git_store: GitStore, username: str, feed_url, dry_run: bool

     

       113
       113
       +
       ) -> tuple[int, int]:

     

       104
       114
        
           """Sync a single feed for a user."""

     

       105
       115
        
       

     

       106
       116
        
           parser = FeedParser()

+1 -1

src/thicket/cli/main.py

···

       47
       47
        
       

     

       48
       48
        
       

     

       49
       49
        
       # Import commands to register them

     

       50
       50
       -
       from .commands import add, duplicates, index_cmd, info_cmd, init, links_cmd, list_cmd, sync

     

       50
       50
       +
       from .commands import add, duplicates, info_cmd, init, list_cmd, sync  # noqa: F401

     

       51
       51
        
       

     

       52
       52
        
       if __name__ == "__main__":

     

       53
       53
        
           app()

+32 -20

src/thicket/cli/utils.py

···

       8
       8
        
       from rich.progress import Progress, SpinnerColumn, TextColumn

     

       9
       9
        
       from rich.table import Table

     

       10
       10
        
       

     

       11
       11
       -
       from ..models import ThicketConfig, UserMetadata

     

       12
       11
        
       from ..core.git_store import GitStore

     

       12
       12
       +
       from ..models import ThicketConfig, UserMetadata

     

       13
       13
        
       

     

       14
       14
        
       console = Console()

     

       15
       15
        
       

     
···

       17
       17
        
       def get_tsv_mode() -> bool:

     

       18
       18
        
           """Get the global TSV mode setting."""

     

       19
       19
        
           from .main import tsv_mode

     

       20
       20
       +
       

     

       20
       21
        
           return tsv_mode

     

       21
       22
        
       

     

       22
       23
        
       

     
···

       37
       38
        
               default_config = Path("thicket.yaml")

     

       38
       39
        
               if default_config.exists():

     

       39
       40
        
                   import yaml

     

       41
       41
       +
       

     

       40
       42
        
                   with open(default_config) as f:

     

       41
       43
        
                       config_data = yaml.safe_load(f)

     

       42
       44
        
                   return ThicketConfig(**config_data)

     

       43
       43
       -
               

     

       45
       45
       +
       

     

       44
       46
        
               # Fall back to environment variables

     

       45
       47
        
               return ThicketConfig()

     

       46
       48
        
           except Exception as e:

     

       47
       49
        
               console.print(f"[red]Error loading configuration: {e}[/red]")

     

       48
       48
       -
               console.print("[yellow]Run 'thicket init' to create a new configuration.[/yellow]")

     

       50
       50
       +
               console.print(

     

       51
       51
       +
                   "[yellow]Run 'thicket init' to create a new configuration.[/yellow]"

     

       52
       52
       +
               )

     

       49
       53
        
               raise typer.Exit(1) from e

     

       50
       54
        
       

     

       51
       55
        
       

     
···

       78
       82
        
           if get_tsv_mode():

     

       79
       83
        
               print_users_tsv(config)

     

       80
       84
        
               return

     

       81
       81
       -
               

     

       85
       85
       +
       

     

       82
       86
        
           table = Table(title="Users and Feeds")

     

       83
       87
        
           table.add_column("Username", style="cyan", no_wrap=True)

     

       84
       88
        
           table.add_column("Display Name", style="magenta")

     
···

       104
       108
        
           if get_tsv_mode():

     

       105
       109
        
               print_feeds_tsv(config, username)

     

       106
       110
        
               return

     

       107
       107
       -
               

     

       111
       111
       +
       

     

       108
       112
        
           table = Table(title=f"Feeds{f' for {username}' if username else ''}")

     

       109
       113
        
           table.add_column("Username", style="cyan", no_wrap=True)

     

       110
       114
        
           table.add_column("Feed URL", style="blue")

     
···

       154
       158
        
           if get_tsv_mode():

     

       155
       159
        
               print_users_tsv_from_git(users)

     

       156
       160
        
               return

     

       157
       157
       -
               

     

       161
       161
       +
       

     

       158
       162
        
           table = Table(title="Users and Feeds")

     

       159
       163
        
           table.add_column("Username", style="cyan", no_wrap=True)

     

       160
       164
        
           table.add_column("Display Name", style="magenta")

     
···

       175
       179
        
           console.print(table)

     

       176
       180
        
       

     

       177
       181
        
       

     

       178
       178
       -
       def print_feeds_table_from_git(git_store: GitStore, username: Optional[str] = None) -> None:

     

       182
       182
       +
       def print_feeds_table_from_git(

     

       183
       183
       +
           git_store: GitStore, username: Optional[str] = None

     

       184
       184
       +
       ) -> None:

     

       179
       185
        
           """Print a table of feeds from git repository."""

     

       180
       186
        
           if get_tsv_mode():

     

       181
       187
        
               print_feeds_tsv_from_git(git_store, username)

     

       182
       188
        
               return

     

       183
       183
       -
               

     

       189
       189
       +
       

     

       184
       190
        
           table = Table(title=f"Feeds{f' for {username}' if username else ''}")

     

       185
       191
        
           table.add_column("Username", style="cyan", no_wrap=True)

     

       186
       192
        
           table.add_column("Feed URL", style="blue")

     
···

       209
       215
        
           print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")

     

       210
       216
        
           for user in config.users:

     

       211
       217
        
               feeds_str = ",".join(str(feed) for feed in user.feeds)

     

       212
       212
       -
               print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")

     

       218
       218
       +
               print(

     

       219
       219
       +
                   f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}"

     

       220
       220
       +
               )

     

       213
       221
        
       

     

       214
       222
        
       

     

       215
       223
        
       def print_users_tsv_from_git(users: list[UserMetadata]) -> None:

     
···

       217
       225
        
           print("Username\tDisplay Name\tEmail\tHomepage\tFeeds")

     

       218
       226
        
           for user in users:

     

       219
       227
        
               feeds_str = ",".join(user.feeds)

     

       220
       220
       -
               print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}")

     

       228
       228
       +
               print(

     

       229
       229
       +
                   f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}"

     

       230
       230
       +
               )

     

       221
       231
        
       

     

       222
       232
        
       

     

       223
       233
        
       def print_feeds_tsv(config: ThicketConfig, username: Optional[str] = None) -> None:

     
···

       225
       235
        
           print("Username\tFeed URL\tStatus")

     

       226
       236
        
           users = [config.find_user(username)] if username else config.users

     

       227
       237
        
           users = [u for u in users if u is not None]

     

       228
       228
       -
           

     

       238
       238
       +
       

     

       229
       239
        
           for user in users:

     

       230
       240
        
               for feed in user.feeds:

     

       231
       241
        
                   print(f"{user.username}\t{feed}\tActive")

     

       232
       242
        
       

     

       233
       243
        
       

     

       234
       234
       -
       def print_feeds_tsv_from_git(git_store: GitStore, username: Optional[str] = None) -> None:

     

       244
       244
       +
       def print_feeds_tsv_from_git(

     

       245
       245
       +
           git_store: GitStore, username: Optional[str] = None

     

       246
       246
       +
       ) -> None:

     

       235
       247
        
           """Print feeds from git repository in TSV format."""

     

       236
       248
        
           print("Username\tFeed URL\tStatus")

     

       237
       237
       -
           

     

       249
       249
       +
       

     

       238
       250
        
           if username:

     

       239
       251
        
               user = git_store.get_user(username)

     

       240
       252
        
               users = [user] if user else []

     

       241
       253
        
           else:

     

       242
       254
        
               index = git_store._load_index()

     

       243
       255
        
               users = list(index.users.values())

     

       244
       244
       -
           

     

       256
       256
       +
       

     

       245
       257
        
           for user in users:

     

       246
       258
        
               for feed in user.feeds:

     

       247
       259
        
                   print(f"{user.username}\t{feed}\tActive")

     
···

       250
       262
        
       def print_entries_tsv(entries_by_user: list[list], usernames: list[str]) -> None:

     

       251
       263
        
           """Print entries in TSV format."""

     

       252
       264
        
           print("User\tAtom ID\tTitle\tUpdated\tURL")

     

       253
       253
       -
           

     

       265
       265
       +
       

     

       254
       266
        
           # Combine all entries with usernames

     

       255
       267
        
           all_entries = []

     

       256
       268
        
           for entries, username in zip(entries_by_user, usernames):

     

       257
       269
        
               for entry in entries:

     

       258
       270
        
                   all_entries.append((username, entry))

     

       259
       259
       -
           

     

       271
       271
       +
       

     

       260
       272
        
           # Sort by updated time (newest first)

     

       261
       273
        
           all_entries.sort(key=lambda x: x[1].updated, reverse=True)

     

       262
       262
       -
           

     

       274
       274
       +
       

     

       263
       275
        
           for username, entry in all_entries:

     

       264
       276
        
               # Format updated time

     

       265
       277
        
               updated_str = entry.updated.strftime("%Y-%m-%d %H:%M")

     

       266
       266
       -
               

     

       278
       278
       +
       

     

       267
       279
        
               # Escape tabs and newlines in title to preserve TSV format

     

       268
       268
       -
               title = entry.title.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ')

     

       269
       269
       -
               

     

       280
       280
       +
               title = entry.title.replace("\t", " ").replace("\n", " ").replace("\r", " ")

     

       281
       281
       +
       

     

       270
       282
        
               print(f"{username}\t{entry.id}\t{title}\t{updated_str}\t{entry.link}")

+84 -55

src/thicket/core/feed_parser.py

···

       19
       19
        
               """Initialize the feed parser."""

     

       20
       20
        
               self.user_agent = user_agent

     

       21
       21
        
               self.allowed_tags = [

     

       22
       22
       -
                   "a", "abbr", "acronym", "b", "blockquote", "br", "code", "em",

     

       23
       23
       -
                   "i", "li", "ol", "p", "pre", "strong", "ul", "h1", "h2", "h3",

     

       24
       24
       -
                   "h4", "h5", "h6", "img", "div", "span",

     

       22
       22
       +
                   "a",

     

       23
       23
       +
                   "abbr",

     

       24
       24
       +
                   "acronym",

     

       25
       25
       +
                   "b",

     

       26
       26
       +
                   "blockquote",

     

       27
       27
       +
                   "br",

     

       28
       28
       +
                   "code",

     

       29
       29
       +
                   "em",

     

       30
       30
       +
                   "i",

     

       31
       31
       +
                   "li",

     

       32
       32
       +
                   "ol",

     

       33
       33
       +
                   "p",

     

       34
       34
       +
                   "pre",

     

       35
       35
       +
                   "strong",

     

       36
       36
       +
                   "ul",

     

       37
       37
       +
                   "h1",

     

       38
       38
       +
                   "h2",

     

       39
       39
       +
                   "h3",

     

       40
       40
       +
                   "h4",

     

       41
       41
       +
                   "h5",

     

       42
       42
       +
                   "h6",

     

       43
       43
       +
                   "img",

     

       44
       44
       +
                   "div",

     

       45
       45
       +
                   "span",

     

       25
       46
        
               ]

     

       26
       47
        
               self.allowed_attributes = {

     

       27
       48
        
                   "a": ["href", "title"],

     
···

       43
       64
        
                   response.raise_for_status()

     

       44
       65
        
                   return response.text

     

       45
       66
        
       

     

       46
       46
       -
           def parse_feed(self, content: str, source_url: Optional[HttpUrl] = None) -> tuple[FeedMetadata, list[AtomEntry]]:

     

       67
       67
       +
           def parse_feed(

     

       68
       68
       +
               self, content: str, source_url: Optional[HttpUrl] = None

     

       69
       69
       +
           ) -> tuple[FeedMetadata, list[AtomEntry]]:

     

       47
       70
        
               """Parse feed content and return metadata and entries."""

     

       48
       71
        
               parsed = feedparser.parse(content)

     

       49
       72
        
       

     
···

       74
       97
        
               author_email = None

     

       75
       98
        
               author_uri = None

     

       76
       99
        
       

     

       77
       77
       -
               if hasattr(feed, 'author_detail'):

     

       78
       78
       -
                   author_name = feed.author_detail.get('name')

     

       79
       79
       -
                   author_email = feed.author_detail.get('email')

     

       80
       80
       -
                   author_uri = feed.author_detail.get('href')

     

       81
       81
       -
               elif hasattr(feed, 'author'):

     

       100
       100
       +
               if hasattr(feed, "author_detail"):

     

       101
       101
       +
                   author_name = feed.author_detail.get("name")

     

       102
       102
       +
                   author_email = feed.author_detail.get("email")

     

       103
       103
       +
                   author_uri = feed.author_detail.get("href")

     

       104
       104
       +
               elif hasattr(feed, "author"):

     

       82
       105
        
                   author_name = feed.author

     

       83
       106
        
       

     

       84
       107
        
               # Parse managing editor for RSS feeds

     

       85
       85
       -
               if not author_email and hasattr(feed, 'managingEditor'):

     

       108
       108
       +
               if not author_email and hasattr(feed, "managingEditor"):

     

       86
       109
        
                   author_email = feed.managingEditor

     

       87
       110
        
       

     

       88
       111
        
               # Parse feed link

     

       89
       112
        
               feed_link = None

     

       90
       90
       -
               if hasattr(feed, 'link'):

     

       113
       113
       +
               if hasattr(feed, "link"):

     

       91
       114
        
                   try:

     

       92
       115
        
                       feed_link = HttpUrl(feed.link)

     

       93
       116
        
                   except ValidationError:

     
···

       98
       121
        
               icon = None

     

       99
       122
        
               image_url = None

     

       100
       123
        
       

     

       101
       101
       -
               if hasattr(feed, 'image'):

     

       124
       124
       +
               if hasattr(feed, "image"):

     

       102
       125
        
                   try:

     

       103
       103
       -
                       image_url = HttpUrl(feed.image.get('href', feed.image.get('url', '')))

     

       126
       126
       +
                       image_url = HttpUrl(feed.image.get("href", feed.image.get("url", "")))

     

       104
       127
        
                   except (ValidationError, AttributeError):

     

       105
       128
        
                       pass

     

       106
       129
        
       

     

       107
       107
       -
               if hasattr(feed, 'icon'):

     

       130
       130
       +
               if hasattr(feed, "icon"):

     

       108
       131
        
                   try:

     

       109
       132
        
                       icon = HttpUrl(feed.icon)

     

       110
       133
        
                   except ValidationError:

     

       111
       134
        
                       pass

     

       112
       135
        
       

     

       113
       113
       -
               if hasattr(feed, 'logo'):

     

       136
       136
       +
               if hasattr(feed, "logo"):

     

       114
       137
        
                   try:

     

       115
       138
        
                       logo = HttpUrl(feed.logo)

     

       116
       139
        
                   except ValidationError:

     

       117
       140
        
                       pass

     

       118
       141
        
       

     

       119
       142
        
               return FeedMetadata(

     

       120
       120
       -
                   title=getattr(feed, 'title', None),

     

       143
       143
       +
                   title=getattr(feed, "title", None),

     

       121
       144
        
                   author_name=author_name,

     

       122
       145
        
                   author_email=author_email,

     

       123
       146
        
                   author_uri=HttpUrl(author_uri) if author_uri else None,

     
···

       125
       148
        
                   logo=logo,

     

       126
       149
        
                   icon=icon,

     

       127
       150
        
                   image_url=image_url,

     

       128
       128
       -
                   description=getattr(feed, 'description', None),

     

       151
       151
       +
                   description=getattr(feed, "description", None),

     

       129
       152
        
               )

     

       130
       153
        
       

     

       131
       131
       -
           def _normalize_entry(self, entry: feedparser.FeedParserDict, source_url: Optional[HttpUrl] = None) -> AtomEntry:

     

       154
       154
       +
           def _normalize_entry(

     

       155
       155
       +
               self, entry: feedparser.FeedParserDict, source_url: Optional[HttpUrl] = None

     

       156
       156
       +
           ) -> AtomEntry:

     

       132
       157
        
               """Normalize an entry to Atom format."""

     

       133
       158
        
               # Parse timestamps

     

       134
       134
       -
               updated = self._parse_timestamp(entry.get('updated_parsed') or entry.get('published_parsed'))

     

       135
       135
       -
               published = self._parse_timestamp(entry.get('published_parsed'))

     

       159
       159
       +
               updated = self._parse_timestamp(

     

       160
       160
       +
                   entry.get("updated_parsed") or entry.get("published_parsed")

     

       161
       161
       +
               )

     

       162
       162
       +
               published = self._parse_timestamp(entry.get("published_parsed"))

     

       136
       163
        
       

     

       137
       164
        
               # Parse content

     

       138
       165
        
               content = self._extract_content(entry)

     
···

       143
       170
        
       

     

       144
       171
        
               # Parse categories/tags

     

       145
       172
        
               categories = []

     

       146
       146
       -
               if hasattr(entry, 'tags'):

     

       147
       147
       -
                   categories = [tag.get('term', '') for tag in entry.tags if tag.get('term')]

     

       173
       173
       +
               if hasattr(entry, "tags"):

     

       174
       174
       +
                   categories = [tag.get("term", "") for tag in entry.tags if tag.get("term")]

     

       148
       175
        
       

     

       149
       176
        
               # Sanitize HTML content

     

       150
       177
        
               if content:

     

       151
       178
        
                   content = self._sanitize_html(content)

     

       152
       179
        
       

     

       153
       153
       -
               summary = entry.get('summary', '')

     

       180
       180
       +
               summary = entry.get("summary", "")

     

       154
       181
        
               if summary:

     

       155
       182
        
                   summary = self._sanitize_html(summary)

     

       156
       183
        
       

     

       157
       184
        
               return AtomEntry(

     

       158
       158
       -
                   id=entry.get('id', entry.get('link', '')),

     

       159
       159
       -
                   title=entry.get('title', ''),

     

       160
       160
       -
                   link=HttpUrl(entry.get('link', '')),

     

       185
       185
       +
                   id=entry.get("id", entry.get("link", "")),

     

       186
       186
       +
                   title=entry.get("title", ""),

     

       187
       187
       +
                   link=HttpUrl(entry.get("link", "")),

     

       161
       188
        
                   updated=updated,

     

       162
       189
        
                   published=published,

     

       163
       190
        
                   summary=summary or None,

     
···

       165
       192
        
                   content_type=content_type,

     

       166
       193
        
                   author=author,

     

       167
       194
        
                   categories=categories,

     

       168
       168
       -
                   rights=entry.get('rights', None),

     

       195
       195
       +
                   rights=entry.get("rights", None),

     

       169
       196
        
                   source=str(source_url) if source_url else None,

     

       170
       197
        
               )

     

       171
       198
        
       

     
···

       178
       205
        
           def _extract_content(self, entry: feedparser.FeedParserDict) -> Optional[str]:

     

       179
       206
        
               """Extract the best content from an entry."""

     

       180
       207
        
               # Prefer content over summary

     

       181
       181
       -
               if hasattr(entry, 'content') and entry.content:

     

       208
       208
       +
               if hasattr(entry, "content") and entry.content:

     

       182
       209
        
                   # Find the best content (prefer text/html, then text/plain)

     

       183
       210
        
                   for content_item in entry.content:

     

       184
       184
       -
                       if content_item.get('type') in ['text/html', 'html']:

     

       185
       185
       -
                           return content_item.get('value', '')

     

       186
       186
       -
                       elif content_item.get('type') in ['text/plain', 'text']:

     

       187
       187
       -
                           return content_item.get('value', '')

     

       211
       211
       +
                       if content_item.get("type") in ["text/html", "html"]:

     

       212
       212
       +
                           return content_item.get("value", "")

     

       213
       213
       +
                       elif content_item.get("type") in ["text/plain", "text"]:

     

       214
       214
       +
                           return content_item.get("value", "")

     

       188
       215
        
                   # Fallback to first content item

     

       189
       189
       -
                   return entry.content[0].get('value', '')

     

       216
       216
       +
                   return entry.content[0].get("value", "")

     

       190
       217
        
       

     

       191
       218
        
               # Fallback to summary

     

       192
       192
       -
               return entry.get('summary', '')

     

       219
       219
       +
               return entry.get("summary", "")

     

       193
       220
        
       

     

       194
       221
        
           def _extract_content_type(self, entry: feedparser.FeedParserDict) -> str:

     

       195
       222
        
               """Extract content type from entry."""

     

       196
       196
       -
               if hasattr(entry, 'content') and entry.content:

     

       197
       197
       -
                   content_type = entry.content[0].get('type', 'html')

     

       223
       223
       +
               if hasattr(entry, "content") and entry.content:

     

       224
       224
       +
                   content_type = entry.content[0].get("type", "html")

     

       198
       225
        
                   # Normalize content type

     

       199
       199
       -
                   if content_type in ['text/html', 'html']:

     

       200
       200
       -
                       return 'html'

     

       201
       201
       -
                   elif content_type in ['text/plain', 'text']:

     

       202
       202
       -
                       return 'text'

     

       203
       203
       -
                   elif content_type == 'xhtml':

     

       204
       204
       -
                       return 'xhtml'

     

       205
       205
       -
               return 'html'

     

       226
       226
       +
                   if content_type in ["text/html", "html"]:

     

       227
       227
       +
                       return "html"

     

       228
       228
       +
                   elif content_type in ["text/plain", "text"]:

     

       229
       229
       +
                       return "text"

     

       230
       230
       +
                   elif content_type == "xhtml":

     

       231
       231
       +
                       return "xhtml"

     

       232
       232
       +
               return "html"

     

       206
       233
        
       

     

       207
       234
        
           def _extract_author(self, entry: feedparser.FeedParserDict) -> Optional[dict]:

     

       208
       235
        
               """Extract author information from entry."""

     

       209
       236
        
               author = {}

     

       210
       237
        
       

     

       211
       211
       -
               if hasattr(entry, 'author_detail'):

     

       212
       212
       -
                   author.update({

     

       213
       213
       -
                       'name': entry.author_detail.get('name'),

     

       214
       214
       -
                       'email': entry.author_detail.get('email'),

     

       215
       215
       -
                       'uri': entry.author_detail.get('href'),

     

       216
       216
       -
                   })

     

       217
       217
       -
               elif hasattr(entry, 'author'):

     

       218
       218
       -
                   author['name'] = entry.author

     

       238
       238
       +
               if hasattr(entry, "author_detail"):

     

       239
       239
       +
                   author.update(

     

       240
       240
       +
                       {

     

       241
       241
       +
                           "name": entry.author_detail.get("name"),

     

       242
       242
       +
                           "email": entry.author_detail.get("email"),

     

       243
       243
       +
                           "uri": entry.author_detail.get("href"),

     

       244
       244
       +
                       }

     

       245
       245
       +
                   )

     

       246
       246
       +
               elif hasattr(entry, "author"):

     

       247
       247
       +
                   author["name"] = entry.author

     

       219
       248
        
       

     

       220
       249
        
               return author if author else None

     

       221
       250
        
       

     
···

       236
       265
        
               # Start with the path component

     

       237
       266
        
               if parsed.path:

     

       238
       267
        
                   # Remove leading slash and replace problematic characters

     

       239
       239
       -
                   safe_id = parsed.path.lstrip('/').replace('/', '_').replace('\\', '_')

     

       268
       268
       +
                   safe_id = parsed.path.lstrip("/").replace("/", "_").replace("\\", "_")

     

       240
       269
        
               else:

     

       241
       270
        
                   # Use the entire ID as fallback

     

       242
       271
        
                   safe_id = entry_id

     
···

       244
       273
        
               # Replace problematic characters

     

       245
       274
        
               safe_chars = []

     

       246
       275
        
               for char in safe_id:

     

       247
       247
       -
                   if char.isalnum() or char in '-_.':

     

       276
       276
       +
                   if char.isalnum() or char in "-_.":

     

       248
       277
        
                       safe_chars.append(char)

     

       249
       278
        
                   else:

     

       250
       250
       -
                       safe_chars.append('_')

     

       279
       279
       +
                       safe_chars.append("_")

     

       251
       280
        
       

     

       252
       252
       -
               safe_id = ''.join(safe_chars)

     

       281
       281
       +
               safe_id = "".join(safe_chars)

     

       253
       282
        
       

     

       254
       283
        
               # Ensure it's not too long (max 200 chars)

     

       255
       284
        
               if len(safe_id) > 200:

+45 -18

src/thicket/core/git_store.py

···

       53
       53
        
               """Save the index to index.json."""

     

       54
       54
        
               index_path = self.repo_path / "index.json"

     

       55
       55
        
               with open(index_path, "w") as f:

     

       56
       56
       -
                   json.dump(index.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)

     

       56
       56
       +
                   json.dump(

     

       57
       57
       +
                       index.model_dump(mode="json", exclude_none=True),

     

       58
       58
       +
                       f,

     

       59
       59
       +
                       indent=2,

     

       60
       60
       +
                       default=str,

     

       61
       61
       +
                   )

     

       57
       62
        
       

     

       58
       63
        
           def _load_index(self) -> GitStoreIndex:

     

       59
       64
        
               """Load the index from index.json."""

     
···

       86
       91
        
       

     

       87
       92
        
               return DuplicateMap(**data)

     

       88
       93
        
       

     

       89
       89
       -
           def add_user(self, username: str, display_name: Optional[str] = None,

     

       90
       90
       -
                        email: Optional[str] = None, homepage: Optional[str] = None,

     

       91
       91
       -
                        icon: Optional[str] = None, feeds: Optional[list[str]] = None) -> UserMetadata:

     

       94
       94
       +
           def add_user(

     

       95
       95
       +
               self,

     

       96
       96
       +
               username: str,

     

       97
       97
       +
               display_name: Optional[str] = None,

     

       98
       98
       +
               email: Optional[str] = None,

     

       99
       99
       +
               homepage: Optional[str] = None,

     

       100
       100
       +
               icon: Optional[str] = None,

     

       101
       101
       +
               feeds: Optional[list[str]] = None,

     

       102
       102
       +
           ) -> UserMetadata:

     

       92
       103
        
               """Add a new user to the Git store."""

     

       93
       104
        
               index = self._load_index()

     

       94
       105
        
       

     
···

       108
       119
        
                   created=datetime.now(),

     

       109
       120
        
                   last_updated=datetime.now(),

     

       110
       121
        
               )

     

       111
       111
       -
       

     

       112
       122
        
       

     

       113
       123
        
               # Update index

     

       114
       124
        
               index.add_user(user_metadata)

     
···

       136
       146
        
       

     

       137
       147
        
               user.update_timestamp()

     

       138
       148
        
       

     

       139
       139
       -
       

     

       140
       149
        
               # Update index

     

       141
       150
        
               index.add_user(user)

     

       142
       151
        
               self._save_index(index)

     
···

       151
       160
        
       

     

       152
       161
        
               # Sanitize entry ID for filename

     

       153
       162
        
               from .feed_parser import FeedParser

     

       163
       163
       +
       

     

       154
       164
        
               parser = FeedParser()

     

       155
       165
        
               safe_id = parser.sanitize_entry_id(entry.id)

     

       156
       166
        
       

     
···

       163
       173
        
       

     

       164
       174
        
               # Save entry

     

       165
       175
        
               with open(entry_path, "w") as f:

     

       166
       166
       -
                   json.dump(entry.model_dump(mode="json", exclude_none=True), f, indent=2, default=str)

     

       176
       176
       +
                   json.dump(

     

       177
       177
       +
                       entry.model_dump(mode="json", exclude_none=True),

     

       178
       178
       +
                       f,

     

       179
       179
       +
                       indent=2,

     

       180
       180
       +
                       default=str,

     

       181
       181
       +
                   )

     

       167
       182
        
       

     

       168
       183
        
               # Update user metadata if new entry

     

       169
       184
        
               if not entry_exists:

     
···

       181
       196
        
       

     

       182
       197
        
               # Sanitize entry ID

     

       183
       198
        
               from .feed_parser import FeedParser

     

       199
       199
       +
       

     

       184
       200
        
               parser = FeedParser()

     

       185
       201
        
               safe_id = parser.sanitize_entry_id(entry_id)

     

       186
       202
        
       

     
···

       193
       209
        
       

     

       194
       210
        
               return AtomEntry(**data)

     

       195
       211
        
       

     

       196
       196
       -
           def list_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]:

     

       212
       212
       +
           def list_entries(

     

       213
       213
       +
               self, username: str, limit: Optional[int] = None

     

       214
       214
       +
           ) -> list[AtomEntry]:

     

       197
       215
        
               """List entries for a user."""

     

       198
       216
        
               user = self.get_user(username)

     

       199
       217
        
               if not user:

     
···

       204
       222
        
                   return []

     

       205
       223
        
       

     

       206
       224
        
               entries = []

     

       207
       207
       -
               entry_files = sorted(user_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)

     

       208
       208
       -
       

     

       225
       225
       +
               entry_files = sorted(

     

       226
       226
       +
                   user_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True

     

       227
       227
       +
               )

     

       209
       228
        
       

     

       210
       229
        
               if limit:

     

       211
       230
        
                   entry_files = entry_files[:limit]

     
···

       260
       279
        
                   "total_entries": index.total_entries,

     

       261
       280
        
                   "total_duplicates": len(duplicates.duplicates),

     

       262
       281
        
                   "last_updated": index.last_updated,

     

       263
       263
       -
                   "repository_size": sum(f.stat().st_size for f in self.repo_path.rglob("*") if f.is_file()),

     

       282
       282
       +
                   "repository_size": sum(

     

       283
       283
       +
                       f.stat().st_size for f in self.repo_path.rglob("*") if f.is_file()

     

       284
       284
       +
                   ),

     

       264
       285
        
               }

     

       265
       286
        
       

     

       266
       266
       -
           def search_entries(self, query: str, username: Optional[str] = None,

     

       267
       267
       -
                             limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]:

     

       287
       287
       +
           def search_entries(

     

       288
       288
       +
               self, query: str, username: Optional[str] = None, limit: Optional[int] = None

     

       289
       289
       +
           ) -> list[tuple[str, AtomEntry]]:

     

       268
       290
        
               """Search entries by content."""

     

       269
       291
        
               results = []

     

       270
       292
        
       

     
···

       288
       310
        
                           entry = AtomEntry(**data)

     

       289
       311
        
       

     

       290
       312
        
                           # Simple text search in title, summary, and content

     

       291
       291
       -
                           searchable_text = " ".join(filter(None, [

     

       292
       292
       -
                               entry.title,

     

       293
       293
       -
                               entry.summary or "",

     

       294
       294
       -
                               entry.content or "",

     

       295
       295
       -
                           ])).lower()

     

       313
       313
       +
                           searchable_text = " ".join(

     

       314
       314
       +
                               filter(

     

       315
       315
       +
                                   None,

     

       316
       316
       +
                                   [

     

       317
       317
       +
                                       entry.title,

     

       318
       318
       +
                                       entry.summary or "",

     

       319
       319
       +
                                       entry.content or "",

     

       320
       320
       +
                                   ],

     

       321
       321
       +
                               )

     

       322
       322
       +
                           ).lower()

     

       296
       323
        
       

     

       297
       324
        
                           if query.lower() in searchable_text:

     

       298
       325
        
                               results.append((user.username, entry))

-301

src/thicket/core/reference_parser.py

···

       1
       1
       -
       """Reference detection and parsing for blog entries."""

     

       2
       2
       -
       

     

       3
       3
       -
       import re

     

       4
       4
       -
       from typing import Optional

     

       5
       5
       -
       from urllib.parse import urlparse

     

       6
       6
       -
       

     

       7
       7
       -
       from ..models import AtomEntry

     

       8
       8
       -
       

     

       9
       9
       -
       

     

       10
       10
       -
       class BlogReference:

     

       11
       11
       -
           """Represents a reference from one blog entry to another."""

     

       12
       12
       -
       

     

       13
       13
       -
           def __init__(

     

       14
       14
       -
               self,

     

       15
       15
       -
               source_entry_id: str,

     

       16
       16
       -
               source_username: str,

     

       17
       17
       -
               target_url: str,

     

       18
       18
       -
               target_username: Optional[str] = None,

     

       19
       19
       -
               target_entry_id: Optional[str] = None,

     

       20
       20
       -
           ):

     

       21
       21
       -
               self.source_entry_id = source_entry_id

     

       22
       22
       -
               self.source_username = source_username

     

       23
       23
       -
               self.target_url = target_url

     

       24
       24
       -
               self.target_username = target_username

     

       25
       25
       -
               self.target_entry_id = target_entry_id

     

       26
       26
       -
       

     

       27
       27
       -
           def to_dict(self) -> dict:

     

       28
       28
       -
               """Convert to dictionary for JSON serialization."""

     

       29
       29
       -
               result = {

     

       30
       30
       -
                   "source_entry_id": self.source_entry_id,

     

       31
       31
       -
                   "source_username": self.source_username,

     

       32
       32
       -
                   "target_url": self.target_url,

     

       33
       33
       -
               }

     

       34
       34
       -
       

     

       35
       35
       -
               # Only include optional fields if they are not None

     

       36
       36
       -
               if self.target_username is not None:

     

       37
       37
       -
                   result["target_username"] = self.target_username

     

       38
       38
       -
               if self.target_entry_id is not None:

     

       39
       39
       -
                   result["target_entry_id"] = self.target_entry_id

     

       40
       40
       -
       

     

       41
       41
       -
               return result

     

       42
       42
       -
       

     

       43
       43
       -
           @classmethod

     

       44
       44
       -
           def from_dict(cls, data: dict) -> "BlogReference":

     

       45
       45
       -
               """Create from dictionary."""

     

       46
       46
       -
               return cls(

     

       47
       47
       -
                   source_entry_id=data["source_entry_id"],

     

       48
       48
       -
                   source_username=data["source_username"],

     

       49
       49
       -
                   target_url=data["target_url"],

     

       50
       50
       -
                   target_username=data.get("target_username"),

     

       51
       51
       -
                   target_entry_id=data.get("target_entry_id"),

     

       52
       52
       -
               )

     

       53
       53
       -
       

     

       54
       54
       -
       

     

       55
       55
       -
       class ReferenceIndex:

     

       56
       56
       -
           """Index of blog-to-blog references for creating threaded views."""

     

       57
       57
       -
       

     

       58
       58
       -
           def __init__(self):

     

       59
       59
       -
               self.references: list[BlogReference] = []

     

       60
       60
       -
               self.outbound_refs: dict[

     

       61
       61
       -
                   str, list[BlogReference]

     

       62
       62
       -
               ] = {}  # entry_id -> outbound refs

     

       63
       63
       -
               self.inbound_refs: dict[

     

       64
       64
       -
                   str, list[BlogReference]

     

       65
       65
       -
               ] = {}  # entry_id -> inbound refs

     

       66
       66
       -
               self.user_domains: dict[str, set[str]] = {}  # username -> set of domains

     

       67
       67
       -
       

     

       68
       68
       -
           def add_reference(self, ref: BlogReference) -> None:

     

       69
       69
       -
               """Add a reference to the index."""

     

       70
       70
       -
               self.references.append(ref)

     

       71
       71
       -
       

     

       72
       72
       -
               # Update outbound references

     

       73
       73
       -
               source_key = f"{ref.source_username}:{ref.source_entry_id}"

     

       74
       74
       -
               if source_key not in self.outbound_refs:

     

       75
       75
       -
                   self.outbound_refs[source_key] = []

     

       76
       76
       -
               self.outbound_refs[source_key].append(ref)

     

       77
       77
       -
       

     

       78
       78
       -
               # Update inbound references if we can identify the target

     

       79
       79
       -
               if ref.target_username and ref.target_entry_id:

     

       80
       80
       -
                   target_key = f"{ref.target_username}:{ref.target_entry_id}"

     

       81
       81
       -
                   if target_key not in self.inbound_refs:

     

       82
       82
       -
                       self.inbound_refs[target_key] = []

     

       83
       83
       -
                   self.inbound_refs[target_key].append(ref)

     

       84
       84
       -
       

     

       85
       85
       -
           def get_outbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:

     

       86
       86
       -
               """Get all outbound references from an entry."""

     

       87
       87
       -
               key = f"{username}:{entry_id}"

     

       88
       88
       -
               return self.outbound_refs.get(key, [])

     

       89
       89
       -
       

     

       90
       90
       -
           def get_inbound_refs(self, username: str, entry_id: str) -> list[BlogReference]:

     

       91
       91
       -
               """Get all inbound references to an entry."""

     

       92
       92
       -
               key = f"{username}:{entry_id}"

     

       93
       93
       -
               return self.inbound_refs.get(key, [])

     

       94
       94
       -
       

     

       95
       95
       -
           def get_thread_members(self, username: str, entry_id: str) -> set[tuple[str, str]]:

     

       96
       96
       -
               """Get all entries that are part of the same thread."""

     

       97
       97
       -
               visited = set()

     

       98
       98
       -
               to_visit = [(username, entry_id)]

     

       99
       99
       -
               thread_members = set()

     

       100
       100
       -
       

     

       101
       101
       -
               while to_visit:

     

       102
       102
       -
                   current_user, current_entry = to_visit.pop()

     

       103
       103
       -
                   if (current_user, current_entry) in visited:

     

       104
       104
       -
                       continue

     

       105
       105
       -
       

     

       106
       106
       -
                   visited.add((current_user, current_entry))

     

       107
       107
       -
                   thread_members.add((current_user, current_entry))

     

       108
       108
       -
       

     

       109
       109
       -
                   # Add outbound references

     

       110
       110
       -
                   for ref in self.get_outbound_refs(current_user, current_entry):

     

       111
       111
       -
                       if ref.target_username and ref.target_entry_id:

     

       112
       112
       -
                           to_visit.append((ref.target_username, ref.target_entry_id))

     

       113
       113
       -
       

     

       114
       114
       -
                   # Add inbound references

     

       115
       115
       -
                   for ref in self.get_inbound_refs(current_user, current_entry):

     

       116
       116
       -
                       to_visit.append((ref.source_username, ref.source_entry_id))

     

       117
       117
       -
       

     

       118
       118
       -
               return thread_members

     

       119
       119
       -
       

     

       120
       120
       -
           def to_dict(self) -> dict:

     

       121
       121
       -
               """Convert to dictionary for JSON serialization."""

     

       122
       122
       -
               return {

     

       123
       123
       -
                   "references": [ref.to_dict() for ref in self.references],

     

       124
       124
       -
                   "user_domains": {k: list(v) for k, v in self.user_domains.items()},

     

       125
       125
       -
               }

     

       126
       126
       -
       

     

       127
       127
       -
           @classmethod

     

       128
       128
       -
           def from_dict(cls, data: dict) -> "ReferenceIndex":

     

       129
       129
       -
               """Create from dictionary."""

     

       130
       130
       -
               index = cls()

     

       131
       131
       -
               for ref_data in data.get("references", []):

     

       132
       132
       -
                   ref = BlogReference.from_dict(ref_data)

     

       133
       133
       -
                   index.add_reference(ref)

     

       134
       134
       -
       

     

       135
       135
       -
               for username, domains in data.get("user_domains", {}).items():

     

       136
       136
       -
                   index.user_domains[username] = set(domains)

     

       137
       137
       -
       

     

       138
       138
       -
               return index

     

       139
       139
       -
       

     

       140
       140
       -
       

     

       141
       141
       -
       class ReferenceParser:

     

       142
       142
       -
           """Parses blog entries to detect references to other blogs."""

     

       143
       143
       -
       

     

       144
       144
       -
           def __init__(self):

     

       145
       145
       -
               # Common blog platforms and patterns

     

       146
       146
       -
               self.blog_patterns = [

     

       147
       147
       -
                   r"https?://[^/]+\.(?:org|com|net|io|dev|me|co\.uk)/.*",  # Common blog domains

     

       148
       148
       -
                   r"https?://[^/]+\.github\.io/.*",  # GitHub Pages

     

       149
       149
       -
                   r"https?://[^/]+\.substack\.com/.*",  # Substack

     

       150
       150
       -
                   r"https?://medium\.com/.*",  # Medium

     

       151
       151
       -
                   r"https?://[^/]+\.wordpress\.com/.*",  # WordPress.com

     

       152
       152
       -
                   r"https?://[^/]+\.blogspot\.com/.*",  # Blogger

     

       153
       153
       -
               ]

     

       154
       154
       -
       

     

       155
       155
       -
               # Compile regex patterns

     

       156
       156
       -
               self.link_pattern = re.compile(

     

       157
       157
       -
                   r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL

     

       158
       158
       -
               )

     

       159
       159
       -
               self.url_pattern = re.compile(r'https?://[^\s<>"]+')

     

       160
       160
       -
       

     

       161
       161
       -
           def extract_links_from_html(self, html_content: str) -> list[tuple[str, str]]:

     

       162
       162
       -
               """Extract all links from HTML content."""

     

       163
       163
       -
               links = []

     

       164
       164
       -
       

     

       165
       165
       -
               # Extract links from <a> tags

     

       166
       166
       -
               for match in self.link_pattern.finditer(html_content):

     

       167
       167
       -
                   url = match.group(1)

     

       168
       168
       -
                   text = re.sub(

     

       169
       169
       -
                       r"<[^>]+>", "", match.group(2)

     

       170
       170
       -
                   ).strip()  # Remove HTML tags from link text

     

       171
       171
       -
                   links.append((url, text))

     

       172
       172
       -
       

     

       173
       173
       -
               return links

     

       174
       174
       -
       

     

       175
       175
       -
           def is_blog_url(self, url: str) -> bool:

     

       176
       176
       -
               """Check if a URL likely points to a blog post."""

     

       177
       177
       -
               for pattern in self.blog_patterns:

     

       178
       178
       -
                   if re.match(pattern, url):

     

       179
       179
       -
                       return True

     

       180
       180
       -
               return False

     

       181
       181
       -
       

     

       182
       182
       -
           def resolve_target_user(

     

       183
       183
       -
               self, url: str, user_domains: dict[str, set[str]]

     

       184
       184
       -
           ) -> Optional[str]:

     

       185
       185
       -
               """Try to resolve a URL to a known user based on domain mapping."""

     

       186
       186
       -
               parsed_url = urlparse(url)

     

       187
       187
       -
               domain = parsed_url.netloc.lower()

     

       188
       188
       -
       

     

       189
       189
       -
               for username, domains in user_domains.items():

     

       190
       190
       -
                   if domain in domains:

     

       191
       191
       -
                       return username

     

       192
       192
       -
       

     

       193
       193
       -
               return None

     

       194
       194
       -
       

     

       195
       195
       -
           def extract_references(

     

       196
       196
       -
               self, entry: AtomEntry, username: str, user_domains: dict[str, set[str]]

     

       197
       197
       -
           ) -> list[BlogReference]:

     

       198
       198
       -
               """Extract all blog references from an entry."""

     

       199
       199
       -
               references = []

     

       200
       200
       -
       

     

       201
       201
       -
               # Combine all text content for analysis

     

       202
       202
       -
               content_to_search = []

     

       203
       203
       -
               if entry.content:

     

       204
       204
       -
                   content_to_search.append(entry.content)

     

       205
       205
       -
               if entry.summary:

     

       206
       206
       -
                   content_to_search.append(entry.summary)

     

       207
       207
       -
       

     

       208
       208
       -
               for content in content_to_search:

     

       209
       209
       -
                   links = self.extract_links_from_html(content)

     

       210
       210
       -
       

     

       211
       211
       -
                   for url, _link_text in links:

     

       212
       212
       -
                       # Skip internal links (same domain as the entry)

     

       213
       213
       -
                       entry_domain = (

     

       214
       214
       -
                           urlparse(str(entry.link)).netloc.lower() if entry.link else ""

     

       215
       215
       -
                       )

     

       216
       216
       -
                       link_domain = urlparse(url).netloc.lower()

     

       217
       217
       -
       

     

       218
       218
       -
                       if link_domain == entry_domain:

     

       219
       219
       -
                           continue

     

       220
       220
       -
       

     

       221
       221
       -
                       # Check if this looks like a blog URL

     

       222
       222
       -
                       if not self.is_blog_url(url):

     

       223
       223
       -
                           continue

     

       224
       224
       -
       

     

       225
       225
       -
                       # Try to resolve to a known user

     

       226
       226
       -
                       target_username = self.resolve_target_user(url, user_domains)

     

       227
       227
       -
       

     

       228
       228
       -
                       ref = BlogReference(

     

       229
       229
       -
                           source_entry_id=entry.id,

     

       230
       230
       -
                           source_username=username,

     

       231
       231
       -
                           target_url=url,

     

       232
       232
       -
                           target_username=target_username,

     

       233
       233
       -
                           target_entry_id=None,  # Will be resolved later if possible

     

       234
       234
       -
                       )

     

       235
       235
       -
       

     

       236
       236
       -
                       references.append(ref)

     

       237
       237
       -
       

     

       238
       238
       -
               return references

     

       239
       239
       -
       

     

       240
       240
       -
           def build_user_domain_mapping(self, git_store: "GitStore") -> dict[str, set[str]]:

     

       241
       241
       -
               """Build mapping of usernames to their known domains."""

     

       242
       242
       -
               user_domains = {}

     

       243
       243
       -
               index = git_store._load_index()

     

       244
       244
       -
       

     

       245
       245
       -
               for username, user_metadata in index.users.items():

     

       246
       246
       -
                   domains = set()

     

       247
       247
       -
       

     

       248
       248
       -
                   # Add domains from feeds

     

       249
       249
       -
                   for feed_url in user_metadata.feeds:

     

       250
       250
       -
                       domain = urlparse(feed_url).netloc.lower()

     

       251
       251
       -
                       if domain:

     

       252
       252
       -
                           domains.add(domain)

     

       253
       253
       -
       

     

       254
       254
       -
                   # Add domain from homepage

     

       255
       255
       -
                   if user_metadata.homepage:

     

       256
       256
       -
                       domain = urlparse(str(user_metadata.homepage)).netloc.lower()

     

       257
       257
       -
                       if domain:

     

       258
       258
       -
                           domains.add(domain)

     

       259
       259
       -
       

     

       260
       260
       -
                   user_domains[username] = domains

     

       261
       261
       -
       

     

       262
       262
       -
               return user_domains

     

       263
       263
       -
       

     

       264
       264
       -
           def resolve_target_entry_ids(

     

       265
       265
       -
               self, references: list[BlogReference], git_store: "GitStore"

     

       266
       266
       -
           ) -> list[BlogReference]:

     

       267
       267
       -
               """Resolve target_entry_id for references that have target_username but no target_entry_id."""

     

       268
       268
       -
               resolved_refs = []

     

       269
       269
       -
       

     

       270
       270
       -
               for ref in references:

     

       271
       271
       -
                   # If we already have a target_entry_id, keep the reference as-is

     

       272
       272
       -
                   if ref.target_entry_id is not None:

     

       273
       273
       -
                       resolved_refs.append(ref)

     

       274
       274
       -
                       continue

     

       275
       275
       -
       

     

       276
       276
       -
                   # If we don't have a target_username, we can't resolve it

     

       277
       277
       -
                   if ref.target_username is None:

     

       278
       278
       -
                       resolved_refs.append(ref)

     

       279
       279
       -
                       continue

     

       280
       280
       -
       

     

       281
       281
       -
                   # Try to find the entry by matching the URL

     

       282
       282
       -
                   entries = git_store.list_entries(ref.target_username)

     

       283
       283
       -
                   resolved_entry_id = None

     

       284
       284
       -
       

     

       285
       285
       -
                   for entry in entries:

     

       286
       286
       -
                       # Check if the entry's link matches the target URL

     

       287
       287
       -
                       if entry.link and str(entry.link) == ref.target_url:

     

       288
       288
       -
                           resolved_entry_id = entry.id

     

       289
       289
       -
                           break

     

       290
       290
       -
       

     

       291
       291
       -
                   # Create a new reference with the resolved target_entry_id

     

       292
       292
       -
                   resolved_ref = BlogReference(

     

       293
       293
       -
                       source_entry_id=ref.source_entry_id,

     

       294
       294
       -
                       source_username=ref.source_username,

     

       295
       295
       -
                       target_url=ref.target_url,

     

       296
       296
       -
                       target_username=ref.target_username,

     

       297
       297
       -
                       target_entry_id=resolved_entry_id,

     

       298
       298
       -
                   )

     

       299
       299
       -
                   resolved_refs.append(resolved_ref)

     

       300
       300
       -
       

     

       301
       301
       -
               return resolved_refs

+24

src/thicket/models/config.py

···

       31
       31
        
           git_store: Path

     

       32
       32
        
           cache_dir: Path

     

       33
       33
        
           users: list[UserConfig] = []

     

       34
       34
       +
       

     

       35
       35
       +
           def find_user(self, username: str) -> Optional[UserConfig]:

     

       36
       36
       +
               """Find a user by username."""

     

       37
       37
       +
               for user in self.users:

     

       38
       38
       +
                   if user.username == username:

     

       39
       39
       +
                       return user

     

       40
       40
       +
               return None

     

       41
       41
       +
       

     

       42
       42
       +
           def add_user(self, user: UserConfig) -> bool:

     

       43
       43
       +
               """Add a user to the configuration. Returns True if added, False if already exists."""

     

       44
       44
       +
               if self.find_user(user.username) is not None:

     

       45
       45
       +
                   return False

     

       46
       46
       +
               self.users.append(user)

     

       47
       47
       +
               return True

     

       48
       48
       +
       

     

       49
       49
       +
           def add_feed_to_user(self, username: str, feed_url: HttpUrl) -> bool:

     

       50
       50
       +
               """Add a feed to an existing user. Returns True if added, False if user not found or feed already exists."""

     

       51
       51
       +
               user = self.find_user(username)

     

       52
       52
       +
               if user is None:

     

       53
       53
       +
                   return False

     

       54
       54
       +
               if feed_url in user.feeds:

     

       55
       55
       +
                   return False

     

       56
       56
       +
               user.feeds.append(feed_url)

     

       57
       57
       +
               return True

+2 -2

src/thicket/models/feed.py

···

       1
       1
        
       """Feed and entry models for thicket."""

     

       2
       2
        
       

     

       3
       3
        
       from datetime import datetime

     

       4
       4
       -
       from typing import TYPE_CHECKING, Optional

     

       4
       4
       +
       from typing import TYPE_CHECKING, Any, Optional

     

       5
       5
        
       

     

       6
       6
        
       from pydantic import BaseModel, ConfigDict, EmailStr, HttpUrl

     

       7
       7
        
       

     
···

       25
       25
        
           summary: Optional[str] = None

     

       26
       26
        
           content: Optional[str] = None  # Full body content from Atom entry

     

       27
       27
        
           content_type: Optional[str] = "html"  # text, html, xhtml

     

       28
       28
       -
           author: Optional[dict] = None

     

       28
       28
       +
           author: Optional[dict[str, Any]] = None

     

       29
       29
        
           categories: list[str] = []

     

       30
       30
        
           rights: Optional[str] = None  # Copyright info

     

       31
       31
        
           source: Optional[str] = None  # Source feed URL

+1 -3

src/thicket/models/user.py

···

       38
       38
        
       class GitStoreIndex(BaseModel):

     

       39
       39
        
           """Index of all users and their directories in the Git store."""

     

       40
       40
        
       

     

       41
       41
       -
           model_config = ConfigDict(

     

       42
       42
       -
               json_encoders={datetime: lambda v: v.isoformat()}

     

       43
       43
       -
           )

     

       41
       41
       +
           model_config = ConfigDict(json_encoders={datetime: lambda v: v.isoformat()})

     

       44
       42
        
       

     

       45
       43
        
           users: dict[str, UserMetadata] = {}  # username -> UserMetadata

     

       46
       44
        
           created: datetime

+9 -1

uv.lock

···

       1
       1
        
       version = 1

     

       2
       2
       -
       revision = 2

     

       2
       2
       +
       revision = 3

     

       3
       3
        
       requires-python = ">=3.9"

     

       4
       4
        
       resolution-markers = [

     

       5
       5
        
           "python_full_version >= '3.10'",

     
···

       895
       895
        
           { name = "types-pyyaml" },

     

       896
       896
        
       ]

     

       897
       897
        
       

     

       898
       898
       +
       [package.dev-dependencies]

     

       899
       899
       +
       dev = [

     

       900
       900
       +
           { name = "pytest" },

     

       901
       901
       +
       ]

     

       902
       902
       +
       

     

       898
       903
        
       [package.metadata]

     

       899
       904
        
       requires-dist = [

     

       900
       905
        
           { name = "black", marker = "extra == 'dev'", specifier = ">=24.0.0" },

     
···

       918
       923
        
           { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.0" },

     

       919
       924
        
       ]

     

       920
       925
        
       provides-extras = ["dev"]

     

       926
       926
       +
       

     

       927
       927
       +
       [package.metadata.requires-dev]

     

       928
       928
       +
       dev = [{ name = "pytest", specifier = ">=8.4.1" }]

     

       921
       929
        
       

     

       922
       930
        
       [[package]]

     

       923
       931
        
       name = "tomli"

Compare changes