aggregators/kagi-news/tests/test_richtext_formatter.py at main · bretton.dev/coves

bretton.dev / coves
A community based topic aggregation platform built on atproto
coves / aggregators / kagi-news / tests / test_richtext_formatter.py
at main 11 kB view raw
  1"""
  2Tests for Rich Text Formatter.
  3
  4Tests conversion of KagiStory to Coves rich text format with facets.
  5"""
  6import pytest
  7from datetime import datetime
  8
  9from src.richtext_formatter import RichTextFormatter
 10from src.models import KagiStory, Perspective, Quote, Source
 11
 12
 13@pytest.fixture
 14def sample_story():
 15    """Create a sample KagiStory for testing."""
 16    return KagiStory(
 17        title="Trump to meet Xi in South Korea",
 18        link="https://kite.kagi.com/test/world/10",
 19        guid="https://kite.kagi.com/test/world/10",
 20        pub_date=datetime(2025, 10, 23, 20, 56, 0),
 21        categories=["World", "World/Diplomacy"],
 22        summary="The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30.",
 23        highlights=[
 24            "Itinerary details: The Asia swing begins in Malaysia, continues to Japan.",
 25            "APEC context: US officials indicated the leaders will meet on the sidelines."
 26        ],
 27        perspectives=[
 28            Perspective(
 29                actor="President Trump",
 30                description="He said his first question to President Xi would be about fentanyl.",
 31                source_url="https://www.straitstimes.com/world/test"
 32            ),
 33            Perspective(
 34                actor="White House (press secretary)",
 35                description="Karoline Leavitt confirmed the bilateral meeting.",
 36                source_url="https://www.scmp.com/news/test"
 37            )
 38        ],
 39        quote=Quote(
 40            text="Work out a lot of our doubts and questions",
 41            attribution="President Trump"
 42        ),
 43        sources=[
 44            Source(
 45                title="Trump to meet Xi in South Korea",
 46                url="https://www.straitstimes.com/world/test",
 47                domain="straitstimes.com"
 48            ),
 49            Source(
 50                title="Trump meeting Xi next Thursday",
 51                url="https://www.scmp.com/news/test",
 52                domain="scmp.com"
 53            )
 54        ],
 55        image_url="https://kagiproxy.com/img/test123",
 56        image_alt="Test image"
 57    )
 58
 59
 60class TestRichTextFormatter:
 61    """Test suite for RichTextFormatter."""
 62
 63    def test_format_full_returns_content_and_facets(self, sample_story):
 64        """Test that format_full returns content and facets."""
 65        formatter = RichTextFormatter()
 66        result = formatter.format_full(sample_story)
 67
 68        assert 'content' in result
 69        assert 'facets' in result
 70        assert isinstance(result['content'], str)
 71        assert isinstance(result['facets'], list)
 72
 73    def test_content_structure(self, sample_story):
 74        """Test that content has correct structure."""
 75        formatter = RichTextFormatter()
 76        result = formatter.format_full(sample_story)
 77        content = result['content']
 78
 79        # Check all sections are present
 80        assert sample_story.summary in content
 81        assert "Highlights:" in content
 82        assert "Perspectives:" in content
 83        assert "Sources:" in content
 84        assert sample_story.quote.text in content
 85        assert "📰 Story aggregated by Kagi News" in content
 86
 87    def test_facets_for_bold_headers(self, sample_story):
 88        """Test that section headers have bold facets."""
 89        formatter = RichTextFormatter()
 90        result = formatter.format_full(sample_story)
 91
 92        # Find bold facets
 93        bold_facets = [
 94            f for f in result['facets']
 95            if any(feat.get('$type') == 'social.coves.richtext.facet#bold'
 96                   for feat in f['features'])
 97        ]
 98
 99        assert len(bold_facets) > 0
100
101        # Check that "Highlights:" is bolded
102        content = result['content']
103        highlights_pos = content.find("Highlights:")
104
105        # Should have a bold facet covering "Highlights:"
106        has_highlights_bold = any(
107            f['index']['byteStart'] <= highlights_pos and
108            f['index']['byteEnd'] >= highlights_pos + len("Highlights:")
109            for f in bold_facets
110        )
111        assert has_highlights_bold
112
113    def test_facets_for_italic_quote(self, sample_story):
114        """Test that quotes have italic facets."""
115        formatter = RichTextFormatter()
116        result = formatter.format_full(sample_story)
117
118        # Find italic facets
119        italic_facets = [
120            f for f in result['facets']
121            if any(feat.get('$type') == 'social.coves.richtext.facet#italic'
122                   for feat in f['features'])
123        ]
124
125        assert len(italic_facets) > 0
126
127        # The quote text is wrapped with quotes, so search for that
128        content = result['content']
129        quote_with_quotes = f'"{sample_story.quote.text}"'
130        quote_char_pos = content.find(quote_with_quotes)
131
132        # Convert character position to byte position
133        quote_byte_start = len(content[:quote_char_pos].encode('utf-8'))
134        quote_byte_end = len(content[:quote_char_pos + len(quote_with_quotes)].encode('utf-8'))
135
136        has_quote_italic = any(
137            f['index']['byteStart'] <= quote_byte_start and
138            f['index']['byteEnd'] >= quote_byte_end
139            for f in italic_facets
140        )
141        assert has_quote_italic
142
143    def test_facets_for_links(self, sample_story):
144        """Test that URLs have link facets."""
145        formatter = RichTextFormatter()
146        result = formatter.format_full(sample_story)
147
148        # Find link facets
149        link_facets = [
150            f for f in result['facets']
151            if any(feat.get('$type') == 'social.coves.richtext.facet#link'
152                   for feat in f['features'])
153        ]
154
155        # Should have links for: 2 sources + 2 perspectives + 1 Kagi News link = 5 minimum
156        assert len(link_facets) >= 5
157
158        # Check that first source URL has a link facet
159        source_urls = [s.url for s in sample_story.sources]
160        for url in source_urls:
161            has_link = any(
162                any(feat.get('uri') == url for feat in f['features'])
163                for f in link_facets
164            )
165            assert has_link, f"Missing link facet for {url}"
166
167    def test_utf8_byte_positions(self):
168        """Test UTF-8 byte position calculation with multi-byte characters."""
169        # Create story with emoji and non-ASCII characters
170        story = KagiStory(
171            title="Test 👋 Story",
172            link="https://test.com",
173            guid="https://test.com",
174            pub_date=datetime.now(),
175            categories=["Test"],
176            summary="Hello 世界 this is a test with emoji 🎉",
177            highlights=["Test highlight"],
178            perspectives=[],
179            quote=None,
180            sources=[],
181        )
182
183        formatter = RichTextFormatter()
184        result = formatter.format_full(story)
185
186        # Verify content contains the emoji
187        assert "👋" in result['content'] or "🎉" in result['content']
188
189        # Verify all facet byte positions are valid
190        content_bytes = result['content'].encode('utf-8')
191        for facet in result['facets']:
192            start = facet['index']['byteStart']
193            end = facet['index']['byteEnd']
194
195            # Positions should be within bounds
196            assert 0 <= start < len(content_bytes)
197            assert start < end <= len(content_bytes)
198
199    def test_format_story_without_optional_fields(self):
200        """Test formatting story with missing optional fields."""
201        minimal_story = KagiStory(
202            title="Minimal Story",
203            link="https://test.com",
204            guid="https://test.com",
205            pub_date=datetime.now(),
206            categories=["Test"],
207            summary="Just a summary.",
208            highlights=[],  # Empty
209            perspectives=[],  # Empty
210            quote=None,  # Missing
211            sources=[],  # Empty
212        )
213
214        formatter = RichTextFormatter()
215        result = formatter.format_full(minimal_story)
216
217        # Should still have content and facets
218        assert result['content']
219        assert result['facets']
220
221        # Should have summary
222        assert "Just a summary." in result['content']
223
224        # Should NOT have empty sections
225        assert "Highlights:" not in result['content']
226        assert "Perspectives:" not in result['content']
227
228    def test_perspective_actor_is_bolded(self, sample_story):
229        """Test that perspective actor names are bolded."""
230        formatter = RichTextFormatter()
231        result = formatter.format_full(sample_story)
232
233        content = result['content']
234        bold_facets = [
235            f for f in result['facets']
236            if any(feat.get('$type') == 'social.coves.richtext.facet#bold'
237                   for feat in f['features'])
238        ]
239
240        # Find "President Trump:" in perspectives section
241        actor = "President Trump:"
242        perspectives_start = content.find("Perspectives:")
243        actor_char_pos = content.find(actor, perspectives_start)
244
245        if actor_char_pos != -1:  # If found in perspectives
246            # Convert character position to byte position
247            actor_byte_start = len(content[:actor_char_pos].encode('utf-8'))
248            actor_byte_end = len(content[:actor_char_pos + len(actor)].encode('utf-8'))
249
250            has_actor_bold = any(
251                f['index']['byteStart'] <= actor_byte_start and
252                f['index']['byteEnd'] >= actor_byte_end
253                for f in bold_facets
254            )
255            assert has_actor_bold
256
257    def test_kagi_attribution_link(self, sample_story):
258        """Test that Kagi News attribution has a link to the story."""
259        formatter = RichTextFormatter()
260        result = formatter.format_full(sample_story)
261
262        # Should have link to Kagi story
263        link_facets = [
264            f for f in result['facets']
265            if any(feat.get('$type') == 'social.coves.richtext.facet#link'
266                   for feat in f['features'])
267        ]
268
269        # Find link to the Kagi story URL
270        kagi_link = any(
271            any(feat.get('uri') == sample_story.link for feat in f['features'])
272            for f in link_facets
273        )
274        assert kagi_link, "Missing link to Kagi story in attribution"
275
276    def test_facets_do_not_overlap(self, sample_story):
277        """Test that facets with same feature type don't overlap."""
278        formatter = RichTextFormatter()
279        result = formatter.format_full(sample_story)
280
281        # Group facets by type
282        facets_by_type = {}
283        for facet in result['facets']:
284            for feature in facet['features']:
285                ftype = feature['$type']
286                if ftype not in facets_by_type:
287                    facets_by_type[ftype] = []
288                facets_by_type[ftype].append(facet)
289
290        # Check for overlaps within each type
291        for ftype, facets in facets_by_type.items():
292            for i, f1 in enumerate(facets):
293                for f2 in facets[i+1:]:
294                    start1, end1 = f1['index']['byteStart'], f1['index']['byteEnd']
295                    start2, end2 = f2['index']['byteStart'], f2['index']['byteEnd']
296
297                    # Check if they overlap
298                    overlaps = (start1 < end2 and start2 < end1)
299                    assert not overlaps, f"Overlapping facets of type {ftype}: {f1} and {f2}"