Manage Atom feeds in a persistent git repository
Code Duplication Analysis for Thicket#
1. Duplicate JSON Handling Code#
Pattern: JSON file reading/writing#
Locations:
src/thicket/cli/commands/generate.py:230- Reading JSON withjson.load(f)src/thicket/cli/commands/generate.py:249- Reading links.jsonsrc/thicket/cli/commands/index.py:2305- Reading JSONsrc/thicket/cli/commands/index.py:2320- Writing JSON withjson.dump()src/thicket/cli/commands/threads.py:2456- Reading JSONsrc/thicket/cli/commands/info.py:2683- Reading JSONsrc/thicket/core/git_store.py:5546- Writing JSON with custom serializersrc/thicket/core/git_store.py:5556- Reading JSONsrc/thicket/core/git_store.py:5566- Writing JSONsrc/thicket/core/git_store.py:5656- Writing JSON with model dump
Recommendation: Create a shared json_utils.py module:
def read_json_file(path: Path) -> dict:
"""Read JSON file with error handling."""
with open(path) as f:
return json.load(f)
def write_json_file(path: Path, data: dict, indent: int = 2) -> None:
"""Write JSON file with consistent formatting."""
with open(path, "w") as f:
json.dump(data, f, indent=indent, default=str)
def write_model_json(path: Path, model: BaseModel, indent: int = 2) -> None:
"""Write Pydantic model as JSON."""
with open(path, "w") as f:
json.dump(model.model_dump(mode="json", exclude_none=True), f, indent=indent, default=str)
2. Repeated Datetime Handling#
Pattern: datetime formatting and fallback handling#
Locations:
src/thicket/cli/commands/generate.py:241-key=lambda x: x[1].updated or x[1].published or datetime.minsrc/thicket/cli/commands/generate.py:353- Same pattern in thread sortingsrc/thicket/cli/commands/generate.py:359- Same pattern for max datesrc/thicket/cli/commands/generate.py:625- Same patternsrc/thicket/cli/commands/generate.py:655-entry.updated or entry.published or datetime.minsrc/thicket/cli/commands/generate.py:689- Same patternsrc/thicket/cli/commands/generate.py:702- Same pattern- Multiple
.strftime('%Y-%m-%d')calls throughout
Recommendation: Create a shared datetime_utils.py module:
def get_entry_date(entry: AtomEntry) -> datetime:
"""Get the most relevant date for an entry with fallback."""
return entry.updated or entry.published or datetime.min
def format_date_short(dt: datetime) -> str:
"""Format datetime as YYYY-MM-DD."""
return dt.strftime('%Y-%m-%d')
def format_date_full(dt: datetime) -> str:
"""Format datetime as YYYY-MM-DD HH:MM."""
return dt.strftime('%Y-%m-%d %H:%M')
def format_date_iso(dt: datetime) -> str:
"""Format datetime as ISO string."""
return dt.isoformat()
3. Path Handling Patterns#
Pattern: Directory creation and existence checks#
Locations:
src/thicket/cli/commands/generate.py:225-if user_dir.exists()src/thicket/cli/commands/generate.py:247-if links_file.exists()src/thicket/cli/commands/generate.py:582-self.output_dir.mkdir(parents=True, exist_ok=True)src/thicket/cli/commands/generate.py:585-586- Multiple mkdir callssrc/thicket/cli/commands/threads.py:2449-if not index_path.exists()src/thicket/cli/commands/info.py:2681-if links_path.exists()src/thicket/core/git_store.py:5515-if not self.repo_path.exists()src/thicket/core/git_store.py:5586-user_dir.mkdir(exist_ok=True)- Many more similar patterns
Recommendation: Create a shared path_utils.py module:
def ensure_directory(path: Path) -> Path:
"""Ensure directory exists, creating if necessary."""
path.mkdir(parents=True, exist_ok=True)
return path
def read_json_if_exists(path: Path, default: Any = None) -> Any:
"""Read JSON file if it exists, otherwise return default."""
if path.exists():
with open(path) as f:
return json.load(f)
return default
def safe_path_join(*parts: Union[str, Path]) -> Path:
"""Safely join path components."""
return Path(*parts)
4. Progress Bar and Console Output#
Pattern: Progress bar creation and updates#
Locations:
src/thicket/cli/commands/generate.py:209- Progress with SpinnerColumnsrc/thicket/cli/commands/index.py:2230- Same Progress pattern- Multiple
console.print()calls with similar formatting patterns - Progress update patterns repeated
Recommendation: Create a shared ui_utils.py module:
def create_progress_spinner(description: str) -> tuple[Progress, TaskID]:
"""Create a standard progress spinner."""
progress = Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
transient=True,
)
task = progress.add_task(description)
return progress, task
def print_success(message: str) -> None:
"""Print success message with consistent formatting."""
console.print(f"[green]✓[/green] {message}")
def print_error(message: str) -> None:
"""Print error message with consistent formatting."""
console.print(f"[red]Error: {message}[/red]")
def print_warning(message: str) -> None:
"""Print warning message with consistent formatting."""
console.print(f"[yellow]Warning: {message}[/yellow]")
5. Git Store Operations#
Pattern: Entry file operations#
Locations:
- Multiple patterns of loading entries from user directories
- Repeated safe_id generation
- Repeated user directory path construction
Recommendation: Enhance GitStore with helper methods:
def get_user_dir(self, username: str) -> Path:
"""Get user directory path."""
return self.repo_path / username
def iter_user_entries(self, username: str) -> Iterator[tuple[Path, AtomEntry]]:
"""Iterate over all entries for a user."""
user_dir = self.get_user_dir(username)
if user_dir.exists():
for entry_file in user_dir.glob("*.json"):
if entry_file.name not in ["index.json", "duplicates.json"]:
try:
entry = self.read_entry_file(entry_file)
yield entry_file, entry
except Exception:
continue
6. Error Handling Patterns#
Pattern: Try-except with console error printing#
Locations:
- Similar error handling patterns throughout CLI commands
- Repeated
raise typer.Exit(1)patterns - Similar exception message formatting
Recommendation: Create error handling decorators:
def handle_cli_errors(func):
"""Decorator to handle CLI command errors consistently."""
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except ValidationError as e:
console.print(f"[red]Validation error: {e}[/red]")
raise typer.Exit(1)
except Exception as e:
console.print(f"[red]Error: {e}[/red]")
if kwargs.get('verbose'):
console.print_exception()
raise typer.Exit(1)
return wrapper
7. Configuration and Validation#
Pattern: Config file loading and validation#
Locations:
- Repeated config loading pattern in every CLI command
- Similar validation patterns for URLs and paths
Recommendation: Create a config_utils.py module:
def load_config_with_defaults(config_path: Optional[Path] = None) -> ThicketConfig:
"""Load config with standard defaults and error handling."""
if config_path is None:
config_path = Path("thicket.yaml")
if not config_path.exists():
raise ConfigError(f"Configuration file not found: {config_path}")
return load_config(config_path)
def validate_url(url: str) -> HttpUrl:
"""Validate and return URL with consistent error handling."""
try:
return HttpUrl(url)
except ValidationError:
raise ConfigError(f"Invalid URL: {url}")
8. Model Serialization#
Pattern: Pydantic model JSON encoding#
Locations:
- Repeated
json_encoders={datetime: lambda v: v.isoformat()}in model configs - Similar model_dump patterns
Recommendation: Create base model class:
class ThicketBaseModel(BaseModel):
"""Base model with common configuration."""
model_config = ConfigDict(
json_encoders={datetime: lambda v: v.isoformat()},
str_strip_whitespace=True,
)
def to_json_dict(self) -> dict:
"""Convert to JSON-serializable dict."""
return self.model_dump(mode="json", exclude_none=True)
Summary of Refactoring Benefits#
- Reduced Code Duplication: Eliminate 30-40% of duplicate code
- Consistent Error Handling: Standardize error messages and handling
- Easier Maintenance: Central location for common patterns
- Better Testing: Easier to unit test shared utilities
- Type Safety: Shared type hints and validation
- Performance: Potential to optimize common operations in one place
Implementation Priority#
-
High Priority:
- JSON utilities (used everywhere)
- Datetime utilities (critical for sorting and display)
- Error handling decorators (improves UX consistency)
-
Medium Priority:
- Path utilities
- UI/Console utilities
- Config utilities
-
Low Priority:
- Base model classes (requires more refactoring)
- Git store enhancements (already well-structured)