A community based topic aggregation platform built on atproto
1"""
2Tests for Main Orchestration Script.
3
4Tests the complete flow: fetch → parse → format → dedupe → post → update state.
5"""
6import pytest
7from pathlib import Path
8from datetime import datetime
9from unittest.mock import Mock, MagicMock, patch, call
10import feedparser
11
12from src.main import Aggregator
13from src.models import KagiStory, AggregatorConfig, FeedConfig, Perspective, Quote, Source
14
15
16@pytest.fixture
17def mock_config():
18 """Mock aggregator configuration."""
19 return AggregatorConfig(
20 coves_api_url="https://api.coves.social",
21 feeds=[
22 FeedConfig(
23 name="World News",
24 url="https://news.kagi.com/world.xml",
25 community_handle="world-news.coves.social",
26 enabled=True
27 ),
28 FeedConfig(
29 name="Tech News",
30 url="https://news.kagi.com/tech.xml",
31 community_handle="tech.coves.social",
32 enabled=True
33 ),
34 FeedConfig(
35 name="Disabled Feed",
36 url="https://news.kagi.com/disabled.xml",
37 community_handle="disabled.coves.social",
38 enabled=False
39 )
40 ],
41 log_level="info"
42 )
43
44
45@pytest.fixture
46def sample_story():
47 """Sample KagiStory for testing."""
48 return KagiStory(
49 title="Test Story",
50 link="https://kite.kagi.com/test/world/1",
51 guid="https://kite.kagi.com/test/world/1",
52 pub_date=datetime(2024, 1, 15, 12, 0, 0),
53 categories=["World"],
54 summary="Test summary",
55 highlights=["Highlight 1", "Highlight 2"],
56 perspectives=[
57 Perspective(
58 actor="Test Actor",
59 description="Test description",
60 source_url="https://example.com/source"
61 )
62 ],
63 quote=Quote(text="Test quote", attribution="Test Author"),
64 sources=[
65 Source(title="Source 1", url="https://example.com/1", domain="example.com")
66 ],
67 image_url="https://example.com/image.jpg",
68 image_alt="Test image"
69 )
70
71
72@pytest.fixture
73def mock_rss_feed():
74 """Mock RSS feed with sample entries."""
75 feed = MagicMock()
76 feed.bozo = 0
77 feed.entries = [
78 MagicMock(
79 title="Story 1",
80 link="https://kite.kagi.com/test/world/1",
81 guid="https://kite.kagi.com/test/world/1",
82 published_parsed=(2024, 1, 15, 12, 0, 0, 0, 15, 0),
83 tags=[MagicMock(term="World")],
84 description="<p>Story 1 description</p>"
85 ),
86 MagicMock(
87 title="Story 2",
88 link="https://kite.kagi.com/test/world/2",
89 guid="https://kite.kagi.com/test/world/2",
90 published_parsed=(2024, 1, 15, 13, 0, 0, 0, 15, 0),
91 tags=[MagicMock(term="World")],
92 description="<p>Story 2 description</p>"
93 )
94 ]
95 return feed
96
97
98class TestAggregator:
99 """Test suite for Aggregator orchestration."""
100
101 def test_initialize_aggregator(self, mock_config, tmp_path):
102 """Test aggregator initialization."""
103 state_file = tmp_path / "state.json"
104
105 with patch('src.main.ConfigLoader') as MockConfigLoader:
106 mock_loader = Mock()
107 mock_loader.load.return_value = mock_config
108 MockConfigLoader.return_value = mock_loader
109
110 aggregator = Aggregator(
111 config_path=Path("config.yaml"),
112 state_file=state_file,
113 coves_client=Mock()
114 )
115
116 assert aggregator.config == mock_config
117 assert aggregator.state_file == state_file
118
119 def test_process_enabled_feeds_only(self, mock_config, tmp_path):
120 """Test that only enabled feeds are processed."""
121 state_file = tmp_path / "state.json"
122 mock_client = Mock()
123
124 with patch('src.main.ConfigLoader') as MockConfigLoader, \
125 patch('src.main.RSSFetcher') as MockRSSFetcher:
126
127 mock_loader = Mock()
128 mock_loader.load.return_value = mock_config
129 MockConfigLoader.return_value = mock_loader
130
131 mock_fetcher = Mock()
132 MockRSSFetcher.return_value = mock_fetcher
133
134 aggregator = Aggregator(
135 config_path=Path("config.yaml"),
136 state_file=state_file,
137 coves_client=mock_client
138 )
139
140 # Mock empty feeds
141 mock_fetcher.fetch_feed.return_value = MagicMock(bozo=0, entries=[])
142
143 aggregator.run()
144
145 # Should only fetch enabled feeds (2)
146 assert mock_fetcher.fetch_feed.call_count == 2
147
148 def test_full_successful_flow(self, mock_config, mock_rss_feed, sample_story, tmp_path):
149 """Test complete flow: fetch → parse → format → post → update state."""
150 state_file = tmp_path / "state.json"
151 mock_client = Mock()
152 mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"
153
154 with patch('src.main.ConfigLoader') as MockConfigLoader, \
155 patch('src.main.RSSFetcher') as MockRSSFetcher, \
156 patch('src.main.KagiHTMLParser') as MockHTMLParser, \
157 patch('src.main.RichTextFormatter') as MockFormatter:
158
159 # Setup mocks
160 mock_loader = Mock()
161 mock_loader.load.return_value = mock_config
162 MockConfigLoader.return_value = mock_loader
163
164 mock_fetcher = Mock()
165 mock_fetcher.fetch_feed.return_value = mock_rss_feed
166 MockRSSFetcher.return_value = mock_fetcher
167
168 mock_parser = Mock()
169 mock_parser.parse_to_story.return_value = sample_story
170 MockHTMLParser.return_value = mock_parser
171
172 mock_formatter = Mock()
173 mock_formatter.format_full.return_value = {
174 "content": "Test content",
175 "facets": []
176 }
177 MockFormatter.return_value = mock_formatter
178
179 # Run aggregator
180 aggregator = Aggregator(
181 config_path=Path("config.yaml"),
182 state_file=state_file,
183 coves_client=mock_client
184 )
185 aggregator.run()
186
187 # Verify RSS fetching
188 assert mock_fetcher.fetch_feed.call_count == 2
189
190 # Verify parsing (2 entries per feed * 2 feeds = 4 total)
191 assert mock_parser.parse_to_story.call_count == 4
192
193 # Verify formatting
194 assert mock_formatter.format_full.call_count == 4
195
196 # Verify posting (should call create_post for each story)
197 assert mock_client.create_post.call_count == 4
198
199 def test_deduplication_skips_posted_stories(self, mock_config, mock_rss_feed, sample_story, tmp_path):
200 """Test that already-posted stories are skipped."""
201 state_file = tmp_path / "state.json"
202 mock_client = Mock()
203 mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"
204
205 with patch('src.main.ConfigLoader') as MockConfigLoader, \
206 patch('src.main.RSSFetcher') as MockRSSFetcher, \
207 patch('src.main.KagiHTMLParser') as MockHTMLParser, \
208 patch('src.main.RichTextFormatter') as MockFormatter:
209
210 # Setup mocks
211 mock_loader = Mock()
212 mock_loader.load.return_value = mock_config
213 MockConfigLoader.return_value = mock_loader
214
215 mock_fetcher = Mock()
216 mock_fetcher.fetch_feed.return_value = mock_rss_feed
217 MockRSSFetcher.return_value = mock_fetcher
218
219 mock_parser = Mock()
220 mock_parser.parse_to_story.return_value = sample_story
221 MockHTMLParser.return_value = mock_parser
222
223 mock_formatter = Mock()
224 mock_formatter.format_full.return_value = {
225 "content": "Test content",
226 "facets": []
227 }
228 MockFormatter.return_value = mock_formatter
229
230 # First run: posts all stories
231 aggregator = Aggregator(
232 config_path=Path("config.yaml"),
233 state_file=state_file,
234 coves_client=mock_client
235 )
236 aggregator.run()
237
238 # Verify first run posted stories
239 first_run_posts = mock_client.create_post.call_count
240 assert first_run_posts == 4
241
242 # Second run: should skip all (already posted)
243 mock_client.reset_mock()
244 aggregator2 = Aggregator(
245 config_path=Path("config.yaml"),
246 state_file=state_file,
247 coves_client=mock_client
248 )
249 aggregator2.run()
250
251 # Should not post any (all duplicates)
252 assert mock_client.create_post.call_count == 0
253
254 def test_continue_on_feed_error(self, mock_config, tmp_path):
255 """Test that processing continues if one feed fails."""
256 state_file = tmp_path / "state.json"
257 mock_client = Mock()
258
259 with patch('src.main.ConfigLoader') as MockConfigLoader, \
260 patch('src.main.RSSFetcher') as MockRSSFetcher:
261
262 mock_loader = Mock()
263 mock_loader.load.return_value = mock_config
264 MockConfigLoader.return_value = mock_loader
265
266 mock_fetcher = Mock()
267 # First feed fails, second succeeds
268 mock_fetcher.fetch_feed.side_effect = [
269 Exception("Network error"),
270 MagicMock(bozo=0, entries=[])
271 ]
272 MockRSSFetcher.return_value = mock_fetcher
273
274 aggregator = Aggregator(
275 config_path=Path("config.yaml"),
276 state_file=state_file,
277 coves_client=mock_client
278 )
279
280 # Should not raise exception
281 aggregator.run()
282
283 # Should have attempted both feeds
284 assert mock_fetcher.fetch_feed.call_count == 2
285
286 def test_handle_empty_feed(self, mock_config, tmp_path):
287 """Test handling of empty RSS feeds."""
288 state_file = tmp_path / "state.json"
289 mock_client = Mock()
290
291 with patch('src.main.ConfigLoader') as MockConfigLoader, \
292 patch('src.main.RSSFetcher') as MockRSSFetcher:
293
294 mock_loader = Mock()
295 mock_loader.load.return_value = mock_config
296 MockConfigLoader.return_value = mock_loader
297
298 mock_fetcher = Mock()
299 mock_fetcher.fetch_feed.return_value = MagicMock(bozo=0, entries=[])
300 MockRSSFetcher.return_value = mock_fetcher
301
302 aggregator = Aggregator(
303 config_path=Path("config.yaml"),
304 state_file=state_file,
305 coves_client=mock_client
306 )
307 aggregator.run()
308
309 # Should not post anything
310 assert mock_client.create_post.call_count == 0
311
312 def test_dont_update_state_on_failed_post(self, mock_config, mock_rss_feed, sample_story, tmp_path):
313 """Test that state is not updated if posting fails."""
314 state_file = tmp_path / "state.json"
315 mock_client = Mock()
316 mock_client.create_post.side_effect = Exception("Post failed")
317
318 with patch('src.main.ConfigLoader') as MockConfigLoader, \
319 patch('src.main.RSSFetcher') as MockRSSFetcher, \
320 patch('src.main.KagiHTMLParser') as MockHTMLParser, \
321 patch('src.main.RichTextFormatter') as MockFormatter:
322
323 # Setup mocks
324 mock_loader = Mock()
325 mock_loader.load.return_value = mock_config
326 MockConfigLoader.return_value = mock_loader
327
328 mock_fetcher = Mock()
329 mock_fetcher.fetch_feed.return_value = mock_rss_feed
330 MockRSSFetcher.return_value = mock_fetcher
331
332 mock_parser = Mock()
333 mock_parser.parse_to_story.return_value = sample_story
334 MockHTMLParser.return_value = mock_parser
335
336 mock_formatter = Mock()
337 mock_formatter.format_full.return_value = {
338 "content": "Test content",
339 "facets": []
340 }
341 MockFormatter.return_value = mock_formatter
342
343 # Run aggregator (posts will fail)
344 aggregator = Aggregator(
345 config_path=Path("config.yaml"),
346 state_file=state_file,
347 coves_client=mock_client
348 )
349 aggregator.run()
350
351 # Reset client to succeed
352 mock_client.reset_mock()
353 mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"
354
355 # Second run: should try to post again (state wasn't updated)
356 aggregator2 = Aggregator(
357 config_path=Path("config.yaml"),
358 state_file=state_file,
359 coves_client=mock_client
360 )
361 aggregator2.run()
362
363 # Should post stories (they weren't marked as posted)
364 assert mock_client.create_post.call_count == 4
365
366 def test_update_last_run_timestamp(self, mock_config, tmp_path):
367 """Test that last_run timestamp is updated after successful processing."""
368 state_file = tmp_path / "state.json"
369 mock_client = Mock()
370
371 with patch('src.main.ConfigLoader') as MockConfigLoader, \
372 patch('src.main.RSSFetcher') as MockRSSFetcher:
373
374 mock_loader = Mock()
375 mock_loader.load.return_value = mock_config
376 MockConfigLoader.return_value = mock_loader
377
378 mock_fetcher = Mock()
379 mock_fetcher.fetch_feed.return_value = MagicMock(bozo=0, entries=[])
380 MockRSSFetcher.return_value = mock_fetcher
381
382 aggregator = Aggregator(
383 config_path=Path("config.yaml"),
384 state_file=state_file,
385 coves_client=mock_client
386 )
387 aggregator.run()
388
389 # Verify last_run was updated for both feeds
390 feed1_last_run = aggregator.state_manager.get_last_run(
391 "https://news.kagi.com/world.xml"
392 )
393 feed2_last_run = aggregator.state_manager.get_last_run(
394 "https://news.kagi.com/tech.xml"
395 )
396
397 assert feed1_last_run is not None
398 assert feed2_last_run is not None
399
400 def test_create_post_with_image_embed(self, mock_config, mock_rss_feed, sample_story, tmp_path):
401 """Test that posts include external image embeds."""
402 state_file = tmp_path / "state.json"
403 mock_client = Mock()
404 mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"
405
406 # Mock create_external_embed to return proper embed structure
407 # Note: Thumbnails are handled by server's unfurl service, not client
408 mock_client.create_external_embed.return_value = {
409 "$type": "social.coves.embed.external",
410 "external": {
411 "uri": sample_story.link,
412 "title": sample_story.title,
413 "description": sample_story.summary
414 }
415 }
416
417 with patch('src.main.ConfigLoader') as MockConfigLoader, \
418 patch('src.main.RSSFetcher') as MockRSSFetcher, \
419 patch('src.main.KagiHTMLParser') as MockHTMLParser, \
420 patch('src.main.RichTextFormatter') as MockFormatter:
421
422 # Setup mocks
423 mock_loader = Mock()
424 mock_loader.load.return_value = mock_config
425 MockConfigLoader.return_value = mock_loader
426
427 mock_fetcher = Mock()
428 # Only one entry for simplicity
429 single_entry_feed = MagicMock(bozo=0, entries=[mock_rss_feed.entries[0]])
430 mock_fetcher.fetch_feed.return_value = single_entry_feed
431 MockRSSFetcher.return_value = mock_fetcher
432
433 mock_parser = Mock()
434 mock_parser.parse_to_story.return_value = sample_story
435 MockHTMLParser.return_value = mock_parser
436
437 mock_formatter = Mock()
438 mock_formatter.format_full.return_value = {
439 "content": "Test content",
440 "facets": []
441 }
442 MockFormatter.return_value = mock_formatter
443
444 # Run aggregator
445 aggregator = Aggregator(
446 config_path=Path("config.yaml"),
447 state_file=state_file,
448 coves_client=mock_client
449 )
450 aggregator.run()
451
452 # Verify create_post was called with embed
453 mock_client.create_post.assert_called()
454 call_kwargs = mock_client.create_post.call_args.kwargs
455
456 assert "embed" in call_kwargs
457 assert call_kwargs["embed"]["$type"] == "social.coves.embed.external"
458 assert call_kwargs["embed"]["external"]["uri"] == sample_story.link
459 assert call_kwargs["embed"]["external"]["title"] == sample_story.title
460 # Thumbnail is not included - server's unfurl service handles it
461 assert "thumb" not in call_kwargs["embed"]["external"]