"""Tests for feed parser functionality.""" from pydantic import HttpUrl from thicket.core.feed_parser import FeedParser from thicket.models import AtomEntry, FeedMetadata class TestFeedParser: """Test the FeedParser class.""" def test_init(self): """Test parser initialization.""" parser = FeedParser() assert parser.user_agent == "thicket/0.1.0" assert "a" in parser.allowed_tags assert "href" in parser.allowed_attributes["a"] def test_parse_atom_feed(self, sample_atom_feed): """Test parsing an Atom feed.""" parser = FeedParser() metadata, entries = parser.parse_feed(sample_atom_feed) # Check metadata assert isinstance(metadata, FeedMetadata) assert metadata.title == "Test Feed" assert metadata.author_name == "Test Author" assert metadata.author_email == "author@example.com" assert metadata.link == HttpUrl("https://example.com/") # Check entries assert len(entries) == 1 entry = entries[0] assert isinstance(entry, AtomEntry) assert entry.title == "Test Entry" assert entry.id == "https://example.com/entry/1" assert entry.link == HttpUrl("https://example.com/entry/1") assert entry.summary == "This is a test entry." assert "

This is the content of the test entry.

" in entry.content def test_parse_rss_feed(self, sample_rss_feed): """Test parsing an RSS feed.""" parser = FeedParser() metadata, entries = parser.parse_feed(sample_rss_feed) # Check metadata assert isinstance(metadata, FeedMetadata) assert metadata.title == "Test RSS Feed" assert metadata.link == HttpUrl("https://example.com/") assert metadata.author_email == "editor@example.com" # Check entries assert len(entries) == 1 entry = entries[0] assert isinstance(entry, AtomEntry) assert entry.title == "Test RSS Entry" assert entry.id == "https://example.com/rss/entry/1" assert entry.summary == "This is a test RSS entry." def test_sanitize_entry_id(self): """Test entry ID sanitization.""" parser = FeedParser() # Test URL ID url_id = "https://example.com/posts/2025/01/test-post" sanitized = parser.sanitize_entry_id(url_id) assert sanitized == "posts_2025_01_test-post" # Test problematic characters bad_id = "test/with\\bad:chars|and" sanitized = parser.sanitize_entry_id(bad_id) assert sanitized == "test_with_bad_chars_and_more_" # Test empty ID empty_id = "" sanitized = parser.sanitize_entry_id(empty_id) assert sanitized == "entry" # Test very long ID long_id = "a" * 300 sanitized = parser.sanitize_entry_id(long_id) assert len(sanitized) == 200 def test_sanitize_html(self): """Test HTML sanitization.""" parser = FeedParser() # Test allowed tags safe_html = "

This is safe HTML

" sanitized = parser._sanitize_html(safe_html) assert sanitized == safe_html # Test dangerous tags dangerous_html = "

Safe content

" sanitized = parser._sanitize_html(dangerous_html) assert "