A community based topic aggregation platform built on atproto
at main 11 kB view raw
1""" 2Tests for Rich Text Formatter. 3 4Tests conversion of KagiStory to Coves rich text format with facets. 5""" 6import pytest 7from datetime import datetime 8 9from src.richtext_formatter import RichTextFormatter 10from src.models import KagiStory, Perspective, Quote, Source 11 12 13@pytest.fixture 14def sample_story(): 15 """Create a sample KagiStory for testing.""" 16 return KagiStory( 17 title="Trump to meet Xi in South Korea", 18 link="https://kite.kagi.com/test/world/10", 19 guid="https://kite.kagi.com/test/world/10", 20 pub_date=datetime(2025, 10, 23, 20, 56, 0), 21 categories=["World", "World/Diplomacy"], 22 summary="The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30.", 23 highlights=[ 24 "Itinerary details: The Asia swing begins in Malaysia, continues to Japan.", 25 "APEC context: US officials indicated the leaders will meet on the sidelines." 26 ], 27 perspectives=[ 28 Perspective( 29 actor="President Trump", 30 description="He said his first question to President Xi would be about fentanyl.", 31 source_url="https://www.straitstimes.com/world/test" 32 ), 33 Perspective( 34 actor="White House (press secretary)", 35 description="Karoline Leavitt confirmed the bilateral meeting.", 36 source_url="https://www.scmp.com/news/test" 37 ) 38 ], 39 quote=Quote( 40 text="Work out a lot of our doubts and questions", 41 attribution="President Trump" 42 ), 43 sources=[ 44 Source( 45 title="Trump to meet Xi in South Korea", 46 url="https://www.straitstimes.com/world/test", 47 domain="straitstimes.com" 48 ), 49 Source( 50 title="Trump meeting Xi next Thursday", 51 url="https://www.scmp.com/news/test", 52 domain="scmp.com" 53 ) 54 ], 55 image_url="https://kagiproxy.com/img/test123", 56 image_alt="Test image" 57 ) 58 59 60class TestRichTextFormatter: 61 """Test suite for RichTextFormatter.""" 62 63 def test_format_full_returns_content_and_facets(self, sample_story): 64 """Test that format_full returns content and facets.""" 65 formatter = RichTextFormatter() 66 result = formatter.format_full(sample_story) 67 68 assert 'content' in result 69 assert 'facets' in result 70 assert isinstance(result['content'], str) 71 assert isinstance(result['facets'], list) 72 73 def test_content_structure(self, sample_story): 74 """Test that content has correct structure.""" 75 formatter = RichTextFormatter() 76 result = formatter.format_full(sample_story) 77 content = result['content'] 78 79 # Check all sections are present 80 assert sample_story.summary in content 81 assert "Highlights:" in content 82 assert "Perspectives:" in content 83 assert "Sources:" in content 84 assert sample_story.quote.text in content 85 assert "📰 Story aggregated by Kagi News" in content 86 87 def test_facets_for_bold_headers(self, sample_story): 88 """Test that section headers have bold facets.""" 89 formatter = RichTextFormatter() 90 result = formatter.format_full(sample_story) 91 92 # Find bold facets 93 bold_facets = [ 94 f for f in result['facets'] 95 if any(feat.get('$type') == 'social.coves.richtext.facet#bold' 96 for feat in f['features']) 97 ] 98 99 assert len(bold_facets) > 0 100 101 # Check that "Highlights:" is bolded 102 content = result['content'] 103 highlights_pos = content.find("Highlights:") 104 105 # Should have a bold facet covering "Highlights:" 106 has_highlights_bold = any( 107 f['index']['byteStart'] <= highlights_pos and 108 f['index']['byteEnd'] >= highlights_pos + len("Highlights:") 109 for f in bold_facets 110 ) 111 assert has_highlights_bold 112 113 def test_facets_for_italic_quote(self, sample_story): 114 """Test that quotes have italic facets.""" 115 formatter = RichTextFormatter() 116 result = formatter.format_full(sample_story) 117 118 # Find italic facets 119 italic_facets = [ 120 f for f in result['facets'] 121 if any(feat.get('$type') == 'social.coves.richtext.facet#italic' 122 for feat in f['features']) 123 ] 124 125 assert len(italic_facets) > 0 126 127 # The quote text is wrapped with quotes, so search for that 128 content = result['content'] 129 quote_with_quotes = f'"{sample_story.quote.text}"' 130 quote_char_pos = content.find(quote_with_quotes) 131 132 # Convert character position to byte position 133 quote_byte_start = len(content[:quote_char_pos].encode('utf-8')) 134 quote_byte_end = len(content[:quote_char_pos + len(quote_with_quotes)].encode('utf-8')) 135 136 has_quote_italic = any( 137 f['index']['byteStart'] <= quote_byte_start and 138 f['index']['byteEnd'] >= quote_byte_end 139 for f in italic_facets 140 ) 141 assert has_quote_italic 142 143 def test_facets_for_links(self, sample_story): 144 """Test that URLs have link facets.""" 145 formatter = RichTextFormatter() 146 result = formatter.format_full(sample_story) 147 148 # Find link facets 149 link_facets = [ 150 f for f in result['facets'] 151 if any(feat.get('$type') == 'social.coves.richtext.facet#link' 152 for feat in f['features']) 153 ] 154 155 # Should have links for: 2 sources + 2 perspectives + 1 Kagi News link = 5 minimum 156 assert len(link_facets) >= 5 157 158 # Check that first source URL has a link facet 159 source_urls = [s.url for s in sample_story.sources] 160 for url in source_urls: 161 has_link = any( 162 any(feat.get('uri') == url for feat in f['features']) 163 for f in link_facets 164 ) 165 assert has_link, f"Missing link facet for {url}" 166 167 def test_utf8_byte_positions(self): 168 """Test UTF-8 byte position calculation with multi-byte characters.""" 169 # Create story with emoji and non-ASCII characters 170 story = KagiStory( 171 title="Test 👋 Story", 172 link="https://test.com", 173 guid="https://test.com", 174 pub_date=datetime.now(), 175 categories=["Test"], 176 summary="Hello 世界 this is a test with emoji 🎉", 177 highlights=["Test highlight"], 178 perspectives=[], 179 quote=None, 180 sources=[], 181 ) 182 183 formatter = RichTextFormatter() 184 result = formatter.format_full(story) 185 186 # Verify content contains the emoji 187 assert "👋" in result['content'] or "🎉" in result['content'] 188 189 # Verify all facet byte positions are valid 190 content_bytes = result['content'].encode('utf-8') 191 for facet in result['facets']: 192 start = facet['index']['byteStart'] 193 end = facet['index']['byteEnd'] 194 195 # Positions should be within bounds 196 assert 0 <= start < len(content_bytes) 197 assert start < end <= len(content_bytes) 198 199 def test_format_story_without_optional_fields(self): 200 """Test formatting story with missing optional fields.""" 201 minimal_story = KagiStory( 202 title="Minimal Story", 203 link="https://test.com", 204 guid="https://test.com", 205 pub_date=datetime.now(), 206 categories=["Test"], 207 summary="Just a summary.", 208 highlights=[], # Empty 209 perspectives=[], # Empty 210 quote=None, # Missing 211 sources=[], # Empty 212 ) 213 214 formatter = RichTextFormatter() 215 result = formatter.format_full(minimal_story) 216 217 # Should still have content and facets 218 assert result['content'] 219 assert result['facets'] 220 221 # Should have summary 222 assert "Just a summary." in result['content'] 223 224 # Should NOT have empty sections 225 assert "Highlights:" not in result['content'] 226 assert "Perspectives:" not in result['content'] 227 228 def test_perspective_actor_is_bolded(self, sample_story): 229 """Test that perspective actor names are bolded.""" 230 formatter = RichTextFormatter() 231 result = formatter.format_full(sample_story) 232 233 content = result['content'] 234 bold_facets = [ 235 f for f in result['facets'] 236 if any(feat.get('$type') == 'social.coves.richtext.facet#bold' 237 for feat in f['features']) 238 ] 239 240 # Find "President Trump:" in perspectives section 241 actor = "President Trump:" 242 perspectives_start = content.find("Perspectives:") 243 actor_char_pos = content.find(actor, perspectives_start) 244 245 if actor_char_pos != -1: # If found in perspectives 246 # Convert character position to byte position 247 actor_byte_start = len(content[:actor_char_pos].encode('utf-8')) 248 actor_byte_end = len(content[:actor_char_pos + len(actor)].encode('utf-8')) 249 250 has_actor_bold = any( 251 f['index']['byteStart'] <= actor_byte_start and 252 f['index']['byteEnd'] >= actor_byte_end 253 for f in bold_facets 254 ) 255 assert has_actor_bold 256 257 def test_kagi_attribution_link(self, sample_story): 258 """Test that Kagi News attribution has a link to the story.""" 259 formatter = RichTextFormatter() 260 result = formatter.format_full(sample_story) 261 262 # Should have link to Kagi story 263 link_facets = [ 264 f for f in result['facets'] 265 if any(feat.get('$type') == 'social.coves.richtext.facet#link' 266 for feat in f['features']) 267 ] 268 269 # Find link to the Kagi story URL 270 kagi_link = any( 271 any(feat.get('uri') == sample_story.link for feat in f['features']) 272 for f in link_facets 273 ) 274 assert kagi_link, "Missing link to Kagi story in attribution" 275 276 def test_facets_do_not_overlap(self, sample_story): 277 """Test that facets with same feature type don't overlap.""" 278 formatter = RichTextFormatter() 279 result = formatter.format_full(sample_story) 280 281 # Group facets by type 282 facets_by_type = {} 283 for facet in result['facets']: 284 for feature in facet['features']: 285 ftype = feature['$type'] 286 if ftype not in facets_by_type: 287 facets_by_type[ftype] = [] 288 facets_by_type[ftype].append(facet) 289 290 # Check for overlaps within each type 291 for ftype, facets in facets_by_type.items(): 292 for i, f1 in enumerate(facets): 293 for f2 in facets[i+1:]: 294 start1, end1 = f1['index']['byteStart'], f1['index']['byteEnd'] 295 start2, end2 = f2['index']['byteStart'], f2['index']['byteEnd'] 296 297 # Check if they overlap 298 overlaps = (start1 < end2 and start2 < end1) 299 assert not overlaps, f"Overlapping facets of type {ftype}: {f1} and {f2}"