A community based topic aggregation platform built on atproto
1"""
2Tests for Rich Text Formatter.
3
4Tests conversion of KagiStory to Coves rich text format with facets.
5"""
6import pytest
7from datetime import datetime
8
9from src.richtext_formatter import RichTextFormatter
10from src.models import KagiStory, Perspective, Quote, Source
11
12
13@pytest.fixture
14def sample_story():
15 """Create a sample KagiStory for testing."""
16 return KagiStory(
17 title="Trump to meet Xi in South Korea",
18 link="https://kite.kagi.com/test/world/10",
19 guid="https://kite.kagi.com/test/world/10",
20 pub_date=datetime(2025, 10, 23, 20, 56, 0),
21 categories=["World", "World/Diplomacy"],
22 summary="The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30.",
23 highlights=[
24 "Itinerary details: The Asia swing begins in Malaysia, continues to Japan.",
25 "APEC context: US officials indicated the leaders will meet on the sidelines."
26 ],
27 perspectives=[
28 Perspective(
29 actor="President Trump",
30 description="He said his first question to President Xi would be about fentanyl.",
31 source_url="https://www.straitstimes.com/world/test"
32 ),
33 Perspective(
34 actor="White House (press secretary)",
35 description="Karoline Leavitt confirmed the bilateral meeting.",
36 source_url="https://www.scmp.com/news/test"
37 )
38 ],
39 quote=Quote(
40 text="Work out a lot of our doubts and questions",
41 attribution="President Trump"
42 ),
43 sources=[
44 Source(
45 title="Trump to meet Xi in South Korea",
46 url="https://www.straitstimes.com/world/test",
47 domain="straitstimes.com"
48 ),
49 Source(
50 title="Trump meeting Xi next Thursday",
51 url="https://www.scmp.com/news/test",
52 domain="scmp.com"
53 )
54 ],
55 image_url="https://kagiproxy.com/img/test123",
56 image_alt="Test image"
57 )
58
59
60class TestRichTextFormatter:
61 """Test suite for RichTextFormatter."""
62
63 def test_format_full_returns_content_and_facets(self, sample_story):
64 """Test that format_full returns content and facets."""
65 formatter = RichTextFormatter()
66 result = formatter.format_full(sample_story)
67
68 assert 'content' in result
69 assert 'facets' in result
70 assert isinstance(result['content'], str)
71 assert isinstance(result['facets'], list)
72
73 def test_content_structure(self, sample_story):
74 """Test that content has correct structure."""
75 formatter = RichTextFormatter()
76 result = formatter.format_full(sample_story)
77 content = result['content']
78
79 # Check all sections are present
80 assert sample_story.summary in content
81 assert "Highlights:" in content
82 assert "Perspectives:" in content
83 assert "Sources:" in content
84 assert sample_story.quote.text in content
85 assert "📰 Story aggregated by Kagi News" in content
86
87 def test_facets_for_bold_headers(self, sample_story):
88 """Test that section headers have bold facets."""
89 formatter = RichTextFormatter()
90 result = formatter.format_full(sample_story)
91
92 # Find bold facets
93 bold_facets = [
94 f for f in result['facets']
95 if any(feat.get('$type') == 'social.coves.richtext.facet#bold'
96 for feat in f['features'])
97 ]
98
99 assert len(bold_facets) > 0
100
101 # Check that "Highlights:" is bolded
102 content = result['content']
103 highlights_pos = content.find("Highlights:")
104
105 # Should have a bold facet covering "Highlights:"
106 has_highlights_bold = any(
107 f['index']['byteStart'] <= highlights_pos and
108 f['index']['byteEnd'] >= highlights_pos + len("Highlights:")
109 for f in bold_facets
110 )
111 assert has_highlights_bold
112
113 def test_facets_for_italic_quote(self, sample_story):
114 """Test that quotes have italic facets."""
115 formatter = RichTextFormatter()
116 result = formatter.format_full(sample_story)
117
118 # Find italic facets
119 italic_facets = [
120 f for f in result['facets']
121 if any(feat.get('$type') == 'social.coves.richtext.facet#italic'
122 for feat in f['features'])
123 ]
124
125 assert len(italic_facets) > 0
126
127 # The quote text is wrapped with quotes, so search for that
128 content = result['content']
129 quote_with_quotes = f'"{sample_story.quote.text}"'
130 quote_char_pos = content.find(quote_with_quotes)
131
132 # Convert character position to byte position
133 quote_byte_start = len(content[:quote_char_pos].encode('utf-8'))
134 quote_byte_end = len(content[:quote_char_pos + len(quote_with_quotes)].encode('utf-8'))
135
136 has_quote_italic = any(
137 f['index']['byteStart'] <= quote_byte_start and
138 f['index']['byteEnd'] >= quote_byte_end
139 for f in italic_facets
140 )
141 assert has_quote_italic
142
143 def test_facets_for_links(self, sample_story):
144 """Test that URLs have link facets."""
145 formatter = RichTextFormatter()
146 result = formatter.format_full(sample_story)
147
148 # Find link facets
149 link_facets = [
150 f for f in result['facets']
151 if any(feat.get('$type') == 'social.coves.richtext.facet#link'
152 for feat in f['features'])
153 ]
154
155 # Should have links for: 2 sources + 2 perspectives + 1 Kagi News link = 5 minimum
156 assert len(link_facets) >= 5
157
158 # Check that first source URL has a link facet
159 source_urls = [s.url for s in sample_story.sources]
160 for url in source_urls:
161 has_link = any(
162 any(feat.get('uri') == url for feat in f['features'])
163 for f in link_facets
164 )
165 assert has_link, f"Missing link facet for {url}"
166
167 def test_utf8_byte_positions(self):
168 """Test UTF-8 byte position calculation with multi-byte characters."""
169 # Create story with emoji and non-ASCII characters
170 story = KagiStory(
171 title="Test 👋 Story",
172 link="https://test.com",
173 guid="https://test.com",
174 pub_date=datetime.now(),
175 categories=["Test"],
176 summary="Hello 世界 this is a test with emoji 🎉",
177 highlights=["Test highlight"],
178 perspectives=[],
179 quote=None,
180 sources=[],
181 )
182
183 formatter = RichTextFormatter()
184 result = formatter.format_full(story)
185
186 # Verify content contains the emoji
187 assert "👋" in result['content'] or "🎉" in result['content']
188
189 # Verify all facet byte positions are valid
190 content_bytes = result['content'].encode('utf-8')
191 for facet in result['facets']:
192 start = facet['index']['byteStart']
193 end = facet['index']['byteEnd']
194
195 # Positions should be within bounds
196 assert 0 <= start < len(content_bytes)
197 assert start < end <= len(content_bytes)
198
199 def test_format_story_without_optional_fields(self):
200 """Test formatting story with missing optional fields."""
201 minimal_story = KagiStory(
202 title="Minimal Story",
203 link="https://test.com",
204 guid="https://test.com",
205 pub_date=datetime.now(),
206 categories=["Test"],
207 summary="Just a summary.",
208 highlights=[], # Empty
209 perspectives=[], # Empty
210 quote=None, # Missing
211 sources=[], # Empty
212 )
213
214 formatter = RichTextFormatter()
215 result = formatter.format_full(minimal_story)
216
217 # Should still have content and facets
218 assert result['content']
219 assert result['facets']
220
221 # Should have summary
222 assert "Just a summary." in result['content']
223
224 # Should NOT have empty sections
225 assert "Highlights:" not in result['content']
226 assert "Perspectives:" not in result['content']
227
228 def test_perspective_actor_is_bolded(self, sample_story):
229 """Test that perspective actor names are bolded."""
230 formatter = RichTextFormatter()
231 result = formatter.format_full(sample_story)
232
233 content = result['content']
234 bold_facets = [
235 f for f in result['facets']
236 if any(feat.get('$type') == 'social.coves.richtext.facet#bold'
237 for feat in f['features'])
238 ]
239
240 # Find "President Trump:" in perspectives section
241 actor = "President Trump:"
242 perspectives_start = content.find("Perspectives:")
243 actor_char_pos = content.find(actor, perspectives_start)
244
245 if actor_char_pos != -1: # If found in perspectives
246 # Convert character position to byte position
247 actor_byte_start = len(content[:actor_char_pos].encode('utf-8'))
248 actor_byte_end = len(content[:actor_char_pos + len(actor)].encode('utf-8'))
249
250 has_actor_bold = any(
251 f['index']['byteStart'] <= actor_byte_start and
252 f['index']['byteEnd'] >= actor_byte_end
253 for f in bold_facets
254 )
255 assert has_actor_bold
256
257 def test_kagi_attribution_link(self, sample_story):
258 """Test that Kagi News attribution has a link to the story."""
259 formatter = RichTextFormatter()
260 result = formatter.format_full(sample_story)
261
262 # Should have link to Kagi story
263 link_facets = [
264 f for f in result['facets']
265 if any(feat.get('$type') == 'social.coves.richtext.facet#link'
266 for feat in f['features'])
267 ]
268
269 # Find link to the Kagi story URL
270 kagi_link = any(
271 any(feat.get('uri') == sample_story.link for feat in f['features'])
272 for f in link_facets
273 )
274 assert kagi_link, "Missing link to Kagi story in attribution"
275
276 def test_facets_do_not_overlap(self, sample_story):
277 """Test that facets with same feature type don't overlap."""
278 formatter = RichTextFormatter()
279 result = formatter.format_full(sample_story)
280
281 # Group facets by type
282 facets_by_type = {}
283 for facet in result['facets']:
284 for feature in facet['features']:
285 ftype = feature['$type']
286 if ftype not in facets_by_type:
287 facets_by_type[ftype] = []
288 facets_by_type[ftype].append(facet)
289
290 # Check for overlaps within each type
291 for ftype, facets in facets_by_type.items():
292 for i, f1 in enumerate(facets):
293 for f2 in facets[i+1:]:
294 start1, end1 = f1['index']['byteStart'], f1['index']['byteEnd']
295 start2, end2 = f2['index']['byteStart'], f2['index']['byteEnd']
296
297 # Check if they overlap
298 overlaps = (start1 < end2 and start2 < end1)
299 assert not overlaps, f"Overlapping facets of type {ftype}: {f1} and {f2}"