""" Tests for Kagi HTML description parser. """ import pytest from pathlib import Path from datetime import datetime import html from src.html_parser import KagiHTMLParser from src.models import KagiStory, Perspective, Quote, Source @pytest.fixture def sample_html_description(): """Load sample HTML from RSS item fixture.""" # This is the escaped HTML from the RSS description field html_content = """

The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30, at the end of an Asia trip that includes Malaysia and Japan . The administration said the meeting will take place Thursday morning local time, and Mr Trump indicated his first question to Xi would concern fentanyl and other bilateral issues . The talks come amid heightened trade tensions after Beijing expanded export curbs on rare-earth minerals and following Mr Trump's recent threat of additional tariffs on Chinese goods, making the meeting a focal point for discussions on trade, technology supply chains and energy .

News image associated with coverage of President Trump's Asia trip and planned meeting with President Xi

Highlights:

Itinerary details: The Asia swing begins in Malaysia, continues to Japan and ends with the bilateral meeting in South Korea on Thursday morning local time, White House press secretary Karoline Leavitt said at a briefing .
APEC context: US officials indicated the leaders will meet on the sidelines of the Asia-Pacific Economic Cooperation gathering, shaping expectations for short, high-level talks rather than a lengthy summit .

Work out a lot of our doubts and questions - President Trump

Perspectives:

President Trump: He said his first question to President Xi would be about fentanyl and indicated he hoped to resolve bilateral doubts and questions in the talks. (The Straits Times)
White House (press secretary): Karoline Leavitt confirmed the bilateral meeting will occur Thursday morning local time during a White House briefing. (South China Morning Post)

Sources:

Trump to meet Xi in South Korea on Oct 30 as part of Asia swing - straitstimes.com
Trump to meet Xi in South Korea next Thursday as part of key Asia trip - scmp.com

""" return html_content class TestKagiHTMLParser: """Test suite for Kagi HTML parser.""" def test_parse_summary(self, sample_html_description): """Test extracting summary paragraph.""" parser = KagiHTMLParser() result = parser.parse(sample_html_description) assert result['summary'].startswith("The White House confirmed President Trump") assert "bilateral meeting with Chinese President Xi Jinping" in result['summary'] def test_parse_image_url(self, sample_html_description): """Test extracting image URL and alt text.""" parser = KagiHTMLParser() result = parser.parse(sample_html_description) assert result['image_url'] is not None assert result['image_url'].startswith("https://kagiproxy.com/img/") assert result['image_alt'] is not None assert "Trump" in result['image_alt'] def test_parse_highlights(self, sample_html_description): """Test extracting highlights list.""" parser = KagiHTMLParser() result = parser.parse(sample_html_description) assert len(result['highlights']) == 2 assert "Itinerary details" in result['highlights'][0] assert "APEC context" in result['highlights'][1] def test_parse_quote(self, sample_html_description): """Test extracting blockquote.""" parser = KagiHTMLParser() result = parser.parse(sample_html_description) assert result['quote'] is not None assert result['quote']['text'] == "Work out a lot of our doubts and questions" assert result['quote']['attribution'] == "President Trump" def test_parse_perspectives(self, sample_html_description): """Test extracting perspectives list.""" parser = KagiHTMLParser() result = parser.parse(sample_html_description) assert len(result['perspectives']) == 2 # First perspective assert result['perspectives'][0]['actor'] == "President Trump" assert "fentanyl" in result['perspectives'][0]['description'] assert result['perspectives'][0]['source_url'] == "https://www.straitstimes.com/world/united-states/trump-to-meet-xi-in-south-korea-on-oct-30-as-part-of-asia-swing" # Second perspective assert "White House" in result['perspectives'][1]['actor'] def test_parse_sources(self, sample_html_description): """Test extracting sources list.""" parser = KagiHTMLParser() result = parser.parse(sample_html_description) assert len(result['sources']) >= 2 # Check first source assert result['sources'][0]['title'] == "Trump to meet Xi in South Korea on Oct 30 as part of Asia swing" assert result['sources'][0]['url'].startswith("https://www.straitstimes.com") assert result['sources'][0]['domain'] == "straitstimes.com" def test_parse_missing_sections(self): """Test parsing HTML with missing sections.""" html_minimal = "

Just a summary, no other sections.

" parser = KagiHTMLParser() result = parser.parse(html_minimal) assert result['summary'] == "Just a summary, no other sections." assert result['highlights'] == [] assert result['perspectives'] == [] assert result['sources'] == [] assert result['quote'] is None assert result['image_url'] is None def test_parse_to_kagi_story(self, sample_html_description): """Test converting parsed HTML to KagiStory object.""" parser = KagiHTMLParser() # Simulate full RSS item data story = parser.parse_to_story( title="Trump to meet Xi in South Korea on Oct 30", link="https://kite.kagi.com/test/world/10", guid="https://kite.kagi.com/test/world/10", pub_date=datetime(2025, 10, 23, 20, 56, 0), categories=["World", "World/Diplomacy"], html_description=sample_html_description ) assert isinstance(story, KagiStory) assert story.title == "Trump to meet Xi in South Korea on Oct 30" assert story.link == "https://kite.kagi.com/test/world/10" assert len(story.highlights) == 2 assert len(story.perspectives) == 2 assert len(story.sources) >= 2 assert story.quote is not None assert story.image_url is not None