commit 36063cadfc8151e71631398d0b53fe31c2b086c5 · bretton.dev/coves

aggregators/kagi-news/tests/__init__.py

···

       1
       1
       +
       """Test suite for Kagi News aggregator."""

+12

aggregators/kagi-news/tests/fixtures/sample_rss_item.xml

···

       1
       1
       +
       <?xml version='1.0' encoding='UTF-8'?>

     

       2
       2
       +
       <!-- Sample RSS item from Kagi News - includes quote, highlights, perspectives, sources -->

     

       3
       3
       +
       <item>

     

       4
       4
       +
         <title>Trump to meet Xi in South Korea on Oct 30</title>

     

       5
       5
       +
         <link>https://kite.kagi.com/96cf948f-8a1b-4281-9ba4-8a9e1ad7b3c6/world/10</link>

     

       6
       6
       +
         <description>&lt;p&gt;The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30, at the end of an Asia trip that includes Malaysia and Japan . The administration said the meeting will take place Thursday morning local time, and Mr Trump indicated his first question to Xi would concern fentanyl and other bilateral issues . The talks come amid heightened trade tensions after Beijing expanded export curbs on rare-earth minerals and following Mr Trump's recent threat of additional tariffs on Chinese goods, making the meeting a focal point for discussions on trade, technology supply chains and energy .&lt;/p&gt;&lt;img src='https://kagiproxy.com/img/Q2SRXQtwTYBIiQeI0FG-X6taF_wHSJaXDiFUzju2kbCWGuOYIFUX--8L0BqE4VKxpbOJY3ylFPJkDpfSnyQYZ1qdOLXbphHTnsOK4jb7gqC4KCn5nf3ANbWCuaFD5ZUSijiK0k7wOLP2fyX6tynu2mPtXlCbotLo2lTrEswZl4-No2AI4mI4lkResfnRdp-YjpoEfCOHkNfbN1-0cNcHt9T2dmgBSXrQ2w' alt='News image associated with coverage of President Trump&amp;#x27;s Asia trip and planned meeting with President Xi' /&gt;&lt;br /&gt;&lt;h3&gt;Highlights:&lt;/h3&gt;&lt;ul&gt;&lt;li&gt;Itinerary details: The Asia swing begins in Malaysia, continues to Japan and ends with the bilateral meeting in South Korea on Thursday morning local time, White House press secretary Karoline Leavitt said at a briefing .&lt;/li&gt;&lt;li&gt;APEC context: US officials indicated the leaders will meet on the sidelines of the Asia-Pacific Economic Cooperation gathering, shaping expectations for short, high-level talks rather than a lengthy summit .&lt;/li&gt;&lt;li&gt;Tariff escalation: President Trump recently threatened an additional 100% tariff on Chinese goods starting in November, a step he has described as unsustainable but that has heightened urgency for talks .&lt;/li&gt;&lt;li&gt;Rare-earth impact: Beijing's expanded curbs on rare-earth exports have exposed supply vulnerabilities because US high-tech firms rely heavily on those materials, raising strategic and economic stakes for the meeting .&lt;/li&gt;&lt;/ul&gt;&lt;blockquote&gt;Work out a lot of our doubts and questions - President Trump&lt;/blockquote&gt;&lt;h3&gt;Perspectives:&lt;/h3&gt;&lt;ul&gt;&lt;li&gt;President Trump: He said his first question to President Xi would be about fentanyl and indicated he hoped to resolve bilateral doubts and questions in the talks. (&lt;a href='https://www.straitstimes.com/world/united-states/trump-to-meet-xi-in-south-korea-on-oct-30-as-part-of-asia-swing'&gt;The Straits Times&lt;/a&gt;)&lt;/li&gt;&lt;li&gt;White House (press secretary): Karoline Leavitt confirmed the bilateral meeting will occur Thursday morning local time during a White House briefing. (&lt;a href='https://www.scmp.com/news/us/diplomacy/article/3330131/donald-trump-meet-chinas-xi-jinping-next-thursday-south-korea-crunch-talks'&gt;South China Morning Post&lt;/a&gt;)&lt;/li&gt;&lt;li&gt;Beijing/Chinese authorities: Officials have defended tighter export controls on rare-earths, a move described in reporting as not explicitly targeting the US though it has raised tensions. (&lt;a href='https://www.rt.com/news/626890-white-house-announces-trump-xi-meeting/'&gt;RT&lt;/a&gt;)&lt;/li&gt;&lt;/ul&gt;&lt;h3&gt;Sources:&lt;/h3&gt;&lt;ul&gt;&lt;li&gt;&lt;a href='https://www.straitstimes.com/world/united-states/trump-to-meet-xi-in-south-korea-on-oct-30-as-part-of-asia-swing'&gt;Trump to meet Xi in South Korea on Oct 30 as part of Asia swing&lt;/a&gt; - straitstimes.com&lt;/li&gt;&lt;li&gt;&lt;a href='https://www.scmp.com/news/us/diplomacy/article/3330131/donald-trump-meet-chinas-xi-jinping-next-thursday-south-korea-crunch-talks'&gt;Trump to meet Xi in South Korea next Thursday as part of key Asia trip&lt;/a&gt; - scmp.com&lt;/li&gt;&lt;li&gt;&lt;a href='https://www.rt.com/news/626890-white-house-announces-trump-xi-meeting/'&gt;White House announces Trump-Xi meeting&lt;/a&gt; - rt.com&lt;/li&gt;&lt;li&gt;&lt;a href='https://www.thehindu.com/news/international/trump-to-meet-xi-in-south-korea-as-part-of-asia-swing/article70195667.ece'&gt;Trump to meet Xi in South Korea as part of Asia swing&lt;/a&gt; - thehindu.com&lt;/li&gt;&lt;li&gt;&lt;a href='https://www.aljazeera.com/news/2025/10/24/white-house-confirms-trump-to-meet-xi-in-south-korea-as-part-of-asia-tour'&gt;White House confirms Trump to meet Xi in South Korea as part of Asia tour&lt;/a&gt; - aljazeera.com&lt;/li&gt;&lt;/ul&gt;</description>

     

       7
       7
       +
         <guid isPermaLink="true">https://kite.kagi.com/96cf948f-8a1b-4281-9ba4-8a9e1ad7b3c6/world/10</guid>

     

       8
       8
       +
         <category>World</category>

     

       9
       9
       +
         <category>World/Diplomacy</category>

     

       10
       10
       +
         <category>Diplomacy</category>

     

       11
       11
       +
         <pubDate>Thu, 23 Oct 2025 20:56:00 +0000</pubDate>

     

       12
       12
       +
       </item>

+246

aggregators/kagi-news/tests/test_config.py

···

       1
       1
       +
       """

     

       2
       2
       +
       Tests for Configuration Loader.

     

       3
       3
       +
       

     

       4
       4
       +
       Tests loading and validating aggregator configuration.

     

       5
       5
       +
       """

     

       6
       6
       +
       import pytest

     

       7
       7
       +
       import tempfile

     

       8
       8
       +
       from pathlib import Path

     

       9
       9
       +
       

     

       10
       10
       +
       from src.config import ConfigLoader, ConfigError

     

       11
       11
       +
       from src.models import AggregatorConfig, FeedConfig

     

       12
       12
       +
       

     

       13
       13
       +
       

     

       14
       14
       +
       @pytest.fixture

     

       15
       15
       +
       def valid_config_yaml():

     

       16
       16
       +
           """Valid configuration YAML."""

     

       17
       17
       +
           return """

     

       18
       18
       +
       coves_api_url: "https://api.coves.social"

     

       19
       19
       +
       

     

       20
       20
       +
       feeds:

     

       21
       21
       +
         - name: "World News"

     

       22
       22
       +
           url: "https://news.kagi.com/world.xml"

     

       23
       23
       +
           community_handle: "world-news.coves.social"

     

       24
       24
       +
           enabled: true

     

       25
       25
       +
       

     

       26
       26
       +
         - name: "Tech News"

     

       27
       27
       +
           url: "https://news.kagi.com/tech.xml"

     

       28
       28
       +
           community_handle: "tech.coves.social"

     

       29
       29
       +
           enabled: true

     

       30
       30
       +
       

     

       31
       31
       +
         - name: "Science News"

     

       32
       32
       +
           url: "https://news.kagi.com/science.xml"

     

       33
       33
       +
           community_handle: "science.coves.social"

     

       34
       34
       +
           enabled: false

     

       35
       35
       +
       

     

       36
       36
       +
       log_level: "info"

     

       37
       37
       +
       """

     

       38
       38
       +
       

     

       39
       39
       +
       

     

       40
       40
       +
       @pytest.fixture

     

       41
       41
       +
       def temp_config_file(valid_config_yaml):

     

       42
       42
       +
           """Create a temporary config file."""

     

       43
       43
       +
           with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       44
       44
       +
               f.write(valid_config_yaml)

     

       45
       45
       +
               temp_path = Path(f.name)

     

       46
       46
       +
           yield temp_path

     

       47
       47
       +
           # Cleanup

     

       48
       48
       +
           if temp_path.exists():

     

       49
       49
       +
               temp_path.unlink()

     

       50
       50
       +
       

     

       51
       51
       +
       

     

       52
       52
       +
       class TestConfigLoader:

     

       53
       53
       +
           """Test suite for ConfigLoader."""

     

       54
       54
       +
       

     

       55
       55
       +
           def test_load_valid_config(self, temp_config_file):

     

       56
       56
       +
               """Test loading valid configuration."""

     

       57
       57
       +
               loader = ConfigLoader(temp_config_file)

     

       58
       58
       +
               config = loader.load()

     

       59
       59
       +
       

     

       60
       60
       +
               assert isinstance(config, AggregatorConfig)

     

       61
       61
       +
               assert config.coves_api_url == "https://api.coves.social"

     

       62
       62
       +
               assert config.log_level == "info"

     

       63
       63
       +
               assert len(config.feeds) == 3

     

       64
       64
       +
       

     

       65
       65
       +
           def test_parse_feed_configs(self, temp_config_file):

     

       66
       66
       +
               """Test parsing feed configurations."""

     

       67
       67
       +
               loader = ConfigLoader(temp_config_file)

     

       68
       68
       +
               config = loader.load()

     

       69
       69
       +
       

     

       70
       70
       +
               # Check first feed

     

       71
       71
       +
               feed1 = config.feeds[0]

     

       72
       72
       +
               assert isinstance(feed1, FeedConfig)

     

       73
       73
       +
               assert feed1.name == "World News"

     

       74
       74
       +
               assert feed1.url == "https://news.kagi.com/world.xml"

     

       75
       75
       +
               assert feed1.community_handle == "world-news.coves.social"

     

       76
       76
       +
               assert feed1.enabled is True

     

       77
       77
       +
       

     

       78
       78
       +
               # Check disabled feed

     

       79
       79
       +
               feed3 = config.feeds[2]

     

       80
       80
       +
               assert feed3.name == "Science News"

     

       81
       81
       +
               assert feed3.enabled is False

     

       82
       82
       +
       

     

       83
       83
       +
           def test_get_enabled_feeds_only(self, temp_config_file):

     

       84
       84
       +
               """Test getting only enabled feeds."""

     

       85
       85
       +
               loader = ConfigLoader(temp_config_file)

     

       86
       86
       +
               config = loader.load()

     

       87
       87
       +
       

     

       88
       88
       +
               enabled_feeds = [f for f in config.feeds if f.enabled]

     

       89
       89
       +
               assert len(enabled_feeds) == 2

     

       90
       90
       +
               assert all(f.enabled for f in enabled_feeds)

     

       91
       91
       +
       

     

       92
       92
       +
           def test_missing_config_file_raises_error(self):

     

       93
       93
       +
               """Test that missing config file raises error."""

     

       94
       94
       +
               with pytest.raises(ConfigError, match="not found"):

     

       95
       95
       +
                   loader = ConfigLoader(Path("nonexistent.yaml"))

     

       96
       96
       +
                   loader.load()

     

       97
       97
       +
       

     

       98
       98
       +
           def test_invalid_yaml_raises_error(self):

     

       99
       99
       +
               """Test that invalid YAML raises error."""

     

       100
       100
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       101
       101
       +
                   f.write("invalid: yaml: content: [[[")

     

       102
       102
       +
                   temp_path = Path(f.name)

     

       103
       103
       +
       

     

       104
       104
       +
               try:

     

       105
       105
       +
                   with pytest.raises(ConfigError, match="Failed to parse"):

     

       106
       106
       +
                       loader = ConfigLoader(temp_path)

     

       107
       107
       +
                       loader.load()

     

       108
       108
       +
               finally:

     

       109
       109
       +
                   temp_path.unlink()

     

       110
       110
       +
       

     

       111
       111
       +
           def test_missing_required_field_raises_error(self):

     

       112
       112
       +
               """Test that missing required fields raise error."""

     

       113
       113
       +
               invalid_yaml = """

     

       114
       114
       +
       feeds:

     

       115
       115
       +
         - name: "Test"

     

       116
       116
       +
           url: "https://test.xml"

     

       117
       117
       +
           # Missing community_handle!

     

       118
       118
       +
       """

     

       119
       119
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       120
       120
       +
                   f.write(invalid_yaml)

     

       121
       121
       +
                   temp_path = Path(f.name)

     

       122
       122
       +
       

     

       123
       123
       +
               try:

     

       124
       124
       +
                   with pytest.raises(ConfigError, match="Missing required field"):

     

       125
       125
       +
                       loader = ConfigLoader(temp_path)

     

       126
       126
       +
                       loader.load()

     

       127
       127
       +
               finally:

     

       128
       128
       +
                   temp_path.unlink()

     

       129
       129
       +
       

     

       130
       130
       +
           def test_missing_coves_api_url_raises_error(self):

     

       131
       131
       +
               """Test that missing coves_api_url raises error."""

     

       132
       132
       +
               invalid_yaml = """

     

       133
       133
       +
       feeds:

     

       134
       134
       +
         - name: "Test"

     

       135
       135
       +
           url: "https://test.xml"

     

       136
       136
       +
           community_handle: "test.coves.social"

     

       137
       137
       +
       """

     

       138
       138
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       139
       139
       +
                   f.write(invalid_yaml)

     

       140
       140
       +
                   temp_path = Path(f.name)

     

       141
       141
       +
       

     

       142
       142
       +
               try:

     

       143
       143
       +
                   with pytest.raises(ConfigError, match="coves_api_url"):

     

       144
       144
       +
                       loader = ConfigLoader(temp_path)

     

       145
       145
       +
                       loader.load()

     

       146
       146
       +
               finally:

     

       147
       147
       +
                   temp_path.unlink()

     

       148
       148
       +
       

     

       149
       149
       +
           def test_default_log_level(self):

     

       150
       150
       +
               """Test that log_level defaults to 'info' if not specified."""

     

       151
       151
       +
               minimal_yaml = """

     

       152
       152
       +
       coves_api_url: "https://api.coves.social"

     

       153
       153
       +
       feeds:

     

       154
       154
       +
         - name: "Test"

     

       155
       155
       +
           url: "https://test.xml"

     

       156
       156
       +
           community_handle: "test.coves.social"

     

       157
       157
       +
       """

     

       158
       158
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       159
       159
       +
                   f.write(minimal_yaml)

     

       160
       160
       +
                   temp_path = Path(f.name)

     

       161
       161
       +
       

     

       162
       162
       +
               try:

     

       163
       163
       +
                   loader = ConfigLoader(temp_path)

     

       164
       164
       +
                   config = loader.load()

     

       165
       165
       +
                   assert config.log_level == "info"

     

       166
       166
       +
               finally:

     

       167
       167
       +
                   temp_path.unlink()

     

       168
       168
       +
       

     

       169
       169
       +
           def test_default_enabled_true(self):

     

       170
       170
       +
               """Test that feed enabled defaults to True if not specified."""

     

       171
       171
       +
               yaml_content = """

     

       172
       172
       +
       coves_api_url: "https://api.coves.social"

     

       173
       173
       +
       feeds:

     

       174
       174
       +
         - name: "Test"

     

       175
       175
       +
           url: "https://test.xml"

     

       176
       176
       +
           community_handle: "test.coves.social"

     

       177
       177
       +
           # No 'enabled' field

     

       178
       178
       +
       """

     

       179
       179
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       180
       180
       +
                   f.write(yaml_content)

     

       181
       181
       +
                   temp_path = Path(f.name)

     

       182
       182
       +
       

     

       183
       183
       +
               try:

     

       184
       184
       +
                   loader = ConfigLoader(temp_path)

     

       185
       185
       +
                   config = loader.load()

     

       186
       186
       +
                   assert config.feeds[0].enabled is True

     

       187
       187
       +
               finally:

     

       188
       188
       +
                   temp_path.unlink()

     

       189
       189
       +
       

     

       190
       190
       +
           def test_invalid_url_format_raises_error(self):

     

       191
       191
       +
               """Test that invalid URLs raise error."""

     

       192
       192
       +
               invalid_yaml = """

     

       193
       193
       +
       coves_api_url: "https://api.coves.social"

     

       194
       194
       +
       feeds:

     

       195
       195
       +
         - name: "Test"

     

       196
       196
       +
           url: "not-a-valid-url"

     

       197
       197
       +
           community_handle: "test.coves.social"

     

       198
       198
       +
       """

     

       199
       199
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       200
       200
       +
                   f.write(invalid_yaml)

     

       201
       201
       +
                   temp_path = Path(f.name)

     

       202
       202
       +
       

     

       203
       203
       +
               try:

     

       204
       204
       +
                   with pytest.raises(ConfigError, match="Invalid URL"):

     

       205
       205
       +
                       loader = ConfigLoader(temp_path)

     

       206
       206
       +
                       loader.load()

     

       207
       207
       +
               finally:

     

       208
       208
       +
                   temp_path.unlink()

     

       209
       209
       +
       

     

       210
       210
       +
           def test_empty_feeds_list_raises_error(self):

     

       211
       211
       +
               """Test that empty feeds list raises error."""

     

       212
       212
       +
               invalid_yaml = """

     

       213
       213
       +
       coves_api_url: "https://api.coves.social"

     

       214
       214
       +
       feeds: []

     

       215
       215
       +
       """

     

       216
       216
       +
               with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.yaml') as f:

     

       217
       217
       +
                   f.write(invalid_yaml)

     

       218
       218
       +
                   temp_path = Path(f.name)

     

       219
       219
       +
       

     

       220
       220
       +
               try:

     

       221
       221
       +
                   with pytest.raises(ConfigError, match="at least one feed"):

     

       222
       222
       +
                       loader = ConfigLoader(temp_path)

     

       223
       223
       +
                       loader.load()

     

       224
       224
       +
               finally:

     

       225
       225
       +
                   temp_path.unlink()

     

       226
       226
       +
       

     

       227
       227
       +
           def test_load_from_env_override(self, temp_config_file, monkeypatch):

     

       228
       228
       +
               """Test that environment variables can override config values."""

     

       229
       229
       +
               # Set environment variable

     

       230
       230
       +
               monkeypatch.setenv("COVES_API_URL", "https://test.coves.social")

     

       231
       231
       +
       

     

       232
       232
       +
               loader = ConfigLoader(temp_config_file)

     

       233
       233
       +
               config = loader.load()

     

       234
       234
       +
       

     

       235
       235
       +
               # Should use env var instead of config file

     

       236
       236
       +
               assert config.coves_api_url == "https://test.coves.social"

     

       237
       237
       +
       

     

       238
       238
       +
           def test_get_feed_by_url(self, temp_config_file):

     

       239
       239
       +
               """Test helper to get feed config by URL."""

     

       240
       240
       +
               loader = ConfigLoader(temp_config_file)

     

       241
       241
       +
               config = loader.load()

     

       242
       242
       +
       

     

       243
       243
       +
               feed = next((f for f in config.feeds if f.url == "https://news.kagi.com/tech.xml"), None)

     

       244
       244
       +
               assert feed is not None

     

       245
       245
       +
               assert feed.name == "Tech News"

     

       246
       246
       +
               assert feed.community_handle == "tech.coves.social"

+433

aggregators/kagi-news/tests/test_e2e.py

···

       1
       1
       +
       """

     

       2
       2
       +
       End-to-End Integration Tests.

     

       3
       3
       +
       

     

       4
       4
       +
       Tests the complete aggregator workflow against live infrastructure:

     

       5
       5
       +
       - Real HTTP mocking (Kagi RSS)

     

       6
       6
       +
       - Real PDS (Coves test PDS via Docker)

     

       7
       7
       +
       - Real community posting

     

       8
       8
       +
       - Real state management

     

       9
       9
       +
       

     

       10
       10
       +
       Requires:

     

       11
       11
       +
       - Coves test PDS running on localhost:3001

     

       12
       12
       +
       - Test database with community: e2e-95206.community.coves.social

     

       13
       13
       +
       - Aggregator account: kagi-news.local.coves.dev

     

       14
       14
       +
       """

     

       15
       15
       +
       import os

     

       16
       16
       +
       import pytest

     

       17
       17
       +
       import responses

     

       18
       18
       +
       from pathlib import Path

     

       19
       19
       +
       from datetime import datetime

     

       20
       20
       +
       

     

       21
       21
       +
       from src.main import Aggregator

     

       22
       22
       +
       from src.coves_client import CovesClient

     

       23
       23
       +
       from src.config import ConfigLoader

     

       24
       24
       +
       

     

       25
       25
       +
       

     

       26
       26
       +
       # Skip E2E tests by default (require live infrastructure)

     

       27
       27
       +
       pytestmark = pytest.mark.skipif(

     

       28
       28
       +
           os.getenv('RUN_E2E_TESTS') != '1',

     

       29
       29
       +
           reason="E2E tests require RUN_E2E_TESTS=1 and live PDS"

     

       30
       30
       +
       )

     

       31
       31
       +
       

     

       32
       32
       +
       

     

       33
       33
       +
       @pytest.fixture

     

       34
       34
       +
       def test_community(aggregator_credentials):

     

       35
       35
       +
           """Create a test community for E2E testing."""

     

       36
       36
       +
           import time

     

       37
       37
       +
           import requests

     

       38
       38
       +
       

     

       39
       39
       +
           handle, password = aggregator_credentials

     

       40
       40
       +
       

     

       41
       41
       +
           # Authenticate

     

       42
       42
       +
           auth_response = requests.post(

     

       43
       43
       +
               "http://localhost:3001/xrpc/com.atproto.server.createSession",

     

       44
       44
       +
               json={"identifier": handle, "password": password}

     

       45
       45
       +
           )

     

       46
       46
       +
           token = auth_response.json()["accessJwt"]

     

       47
       47
       +
       

     

       48
       48
       +
           # Create community (use short name to avoid handle length limits)

     

       49
       49
       +
           community_name = f"e2e-{int(time.time()) % 10000}"  # Last 4 digits only

     

       50
       50
       +
           create_response = requests.post(

     

       51
       51
       +
               "http://localhost:8081/xrpc/social.coves.community.create",

     

       52
       52
       +
               headers={"Authorization": f"Bearer {token}"},

     

       53
       53
       +
               json={

     

       54
       54
       +
                   "name": community_name,

     

       55
       55
       +
                   "displayName": "E2E Test Community",

     

       56
       56
       +
                   "description": "Temporary community for aggregator E2E testing",

     

       57
       57
       +
                   "visibility": "public"

     

       58
       58
       +
               }

     

       59
       59
       +
           )

     

       60
       60
       +
       

     

       61
       61
       +
           if create_response.ok:

     

       62
       62
       +
               community = create_response.json()

     

       63
       63
       +
               community_handle = f"{community_name}.community.coves.social"

     

       64
       64
       +
               print(f"\n✅ Created test community: {community_handle}")

     

       65
       65
       +
               return community_handle

     

       66
       66
       +
           else:

     

       67
       67
       +
               raise Exception(f"Failed to create community: {create_response.text}")

     

       68
       68
       +
       

     

       69
       69
       +
       

     

       70
       70
       +
       @pytest.fixture

     

       71
       71
       +
       def test_config_file(tmp_path, test_community):

     

       72
       72
       +
           """Create test configuration file with dynamic community."""

     

       73
       73
       +
           config_content = f"""

     

       74
       74
       +
       coves_api_url: http://localhost:8081

     

       75
       75
       +
       

     

       76
       76
       +
       feeds:

     

       77
       77
       +
         - name: "Kagi World News"

     

       78
       78
       +
           url: "https://news.kagi.com/world.xml"

     

       79
       79
       +
           community_handle: "{test_community}"

     

       80
       80
       +
           enabled: true

     

       81
       81
       +
       

     

       82
       82
       +
       log_level: debug

     

       83
       83
       +
       """

     

       84
       84
       +
           config_file = tmp_path / "config.yaml"

     

       85
       85
       +
           config_file.write_text(config_content)

     

       86
       86
       +
           return config_file

     

       87
       87
       +
       

     

       88
       88
       +
       

     

       89
       89
       +
       @pytest.fixture

     

       90
       90
       +
       def test_state_file(tmp_path):

     

       91
       91
       +
           """Create temporary state file."""

     

       92
       92
       +
           return tmp_path / "state.json"

     

       93
       93
       +
       

     

       94
       94
       +
       

     

       95
       95
       +
       @pytest.fixture

     

       96
       96
       +
       def mock_kagi_feed():

     

       97
       97
       +
           """Load real Kagi RSS feed fixture."""

     

       98
       98
       +
           # Load from data directory (where actual feed is stored)

     

       99
       99
       +
           fixture_path = Path(__file__).parent.parent / "data" / "world.xml"

     

       100
       100
       +
           if not fixture_path.exists():

     

       101
       101
       +
               # Fallback to tests/fixtures if moved

     

       102
       102
       +
               fixture_path = Path(__file__).parent / "fixtures" / "world.xml"

     

       103
       103
       +
           return fixture_path.read_text()

     

       104
       104
       +
       

     

       105
       105
       +
       

     

       106
       106
       +
       @pytest.fixture

     

       107
       107
       +
       def aggregator_credentials():

     

       108
       108
       +
           """Get aggregator credentials from environment."""

     

       109
       109
       +
           handle = os.getenv('AGGREGATOR_HANDLE', 'kagi-news.local.coves.dev')

     

       110
       110
       +
           password = os.getenv('AGGREGATOR_PASSWORD', 'kagi-aggregator-2024-secure-pass')

     

       111
       111
       +
           return handle, password

     

       112
       112
       +
       

     

       113
       113
       +
       

     

       114
       114
       +
       class TestEndToEnd:

     

       115
       115
       +
           """Full end-to-end integration tests."""

     

       116
       116
       +
       

     

       117
       117
       +
           @responses.activate

     

       118
       118
       +
           def test_full_aggregator_workflow(

     

       119
       119
       +
               self,

     

       120
       120
       +
               test_config_file,

     

       121
       121
       +
               test_state_file,

     

       122
       122
       +
               mock_kagi_feed,

     

       123
       123
       +
               aggregator_credentials

     

       124
       124
       +
           ):

     

       125
       125
       +
               """

     

       126
       126
       +
               Test complete workflow: fetch → parse → format → post → verify.

     

       127
       127
       +
       

     

       128
       128
       +
               This test:

     

       129
       129
       +
               1. Mocks Kagi RSS HTTP request

     

       130
       130
       +
               2. Authenticates with real PDS

     

       131
       131
       +
               3. Parses real Kagi HTML content

     

       132
       132
       +
               4. Formats with rich text facets

     

       133
       133
       +
               5. Posts to real community

     

       134
       134
       +
               6. Verifies post was created

     

       135
       135
       +
               7. Tests deduplication (no repost)

     

       136
       136
       +
               """

     

       137
       137
       +
               # Mock Kagi RSS feed

     

       138
       138
       +
               responses.add(

     

       139
       139
       +
                   responses.GET,

     

       140
       140
       +
                   "https://news.kagi.com/world.xml",

     

       141
       141
       +
                   body=mock_kagi_feed,

     

       142
       142
       +
                   status=200,

     

       143
       143
       +
                   content_type="application/xml"

     

       144
       144
       +
               )

     

       145
       145
       +
       

     

       146
       146
       +
               # Allow passthrough for localhost (PDS)

     

       147
       147
       +
               responses.add_passthru("http://localhost")

     

       148
       148
       +
       

     

       149
       149
       +
               # Set up environment

     

       150
       150
       +
               handle, password = aggregator_credentials

     

       151
       151
       +
               os.environ['AGGREGATOR_HANDLE'] = handle

     

       152
       152
       +
               os.environ['AGGREGATOR_PASSWORD'] = password

     

       153
       153
       +
               os.environ['PDS_URL'] = 'http://localhost:3001'  # Auth through PDS

     

       154
       154
       +
       

     

       155
       155
       +
               # Create aggregator

     

       156
       156
       +
               aggregator = Aggregator(

     

       157
       157
       +
                   config_path=test_config_file,

     

       158
       158
       +
                   state_file=test_state_file

     

       159
       159
       +
               )

     

       160
       160
       +
       

     

       161
       161
       +
               # Run first time: should post stories

     

       162
       162
       +
               print("\n" + "="*60)

     

       163
       163
       +
               print("🚀 Running first aggregator pass (should post stories)")

     

       164
       164
       +
               print("="*60)

     

       165
       165
       +
               aggregator.run()

     

       166
       166
       +
       

     

       167
       167
       +
               # Verify state was updated (stories marked as posted)

     

       168
       168
       +
               posted_count = aggregator.state_manager.get_posted_count(

     

       169
       169
       +
                   "https://news.kagi.com/world.xml"

     

       170
       170
       +
               )

     

       171
       171
       +
               print(f"\n✅ First pass: {posted_count} stories posted and tracked")

     

       172
       172
       +
               assert posted_count > 0, "Should have posted at least one story"

     

       173
       173
       +
       

     

       174
       174
       +
               # Create new aggregator instance (simulates CRON re-run)

     

       175
       175
       +
               aggregator2 = Aggregator(

     

       176
       176
       +
                   config_path=test_config_file,

     

       177
       177
       +
                   state_file=test_state_file

     

       178
       178
       +
               )

     

       179
       179
       +
       

     

       180
       180
       +
               # Run second time: should skip duplicates

     

       181
       181
       +
               print("\n" + "="*60)

     

       182
       182
       +
               print("🔄 Running second aggregator pass (should skip duplicates)")

     

       183
       183
       +
               print("="*60)

     

       184
       184
       +
               aggregator2.run()

     

       185
       185
       +
       

     

       186
       186
       +
               # Verify count didn't change (deduplication worked)

     

       187
       187
       +
               posted_count2 = aggregator2.state_manager.get_posted_count(

     

       188
       188
       +
                   "https://news.kagi.com/world.xml"

     

       189
       189
       +
               )

     

       190
       190
       +
               print(f"\n✅ Second pass: Still {posted_count2} stories (duplicates skipped)")

     

       191
       191
       +
               assert posted_count2 == posted_count, "Should not post duplicates"

     

       192
       192
       +
       

     

       193
       193
       +
           @responses.activate

     

       194
       194
       +
           def test_post_with_external_embed(

     

       195
       195
       +
               self,

     

       196
       196
       +
               test_config_file,

     

       197
       197
       +
               test_state_file,

     

       198
       198
       +
               mock_kagi_feed,

     

       199
       199
       +
               aggregator_credentials

     

       200
       200
       +
           ):

     

       201
       201
       +
               """

     

       202
       202
       +
               Test that posts include external embeds with images.

     

       203
       203
       +
       

     

       204
       204
       +
               Verifies:

     

       205
       205
       +
               - External embed is created

     

       206
       206
       +
               - Thumbnail URL is included

     

       207
       207
       +
               - Title and description are set

     

       208
       208
       +
               """

     

       209
       209
       +
               # Mock Kagi RSS feed

     

       210
       210
       +
               responses.add(

     

       211
       211
       +
                   responses.GET,

     

       212
       212
       +
                   "https://news.kagi.com/world.xml",

     

       213
       213
       +
                   body=mock_kagi_feed,

     

       214
       214
       +
                   status=200

     

       215
       215
       +
               )

     

       216
       216
       +
       

     

       217
       217
       +
               # Allow passthrough for localhost (PDS)

     

       218
       218
       +
               responses.add_passthru("http://localhost")

     

       219
       219
       +
       

     

       220
       220
       +
               # Set up environment

     

       221
       221
       +
               handle, password = aggregator_credentials

     

       222
       222
       +
               os.environ['AGGREGATOR_HANDLE'] = handle

     

       223
       223
       +
               os.environ['AGGREGATOR_PASSWORD'] = password

     

       224
       224
       +
               os.environ['PDS_URL'] = 'http://localhost:3001'  # Auth through PDS

     

       225
       225
       +
       

     

       226
       226
       +
               # Run aggregator

     

       227
       227
       +
               aggregator = Aggregator(

     

       228
       228
       +
                   config_path=test_config_file,

     

       229
       229
       +
                   state_file=test_state_file

     

       230
       230
       +
               )

     

       231
       231
       +
       

     

       232
       232
       +
               print("\n" + "="*60)

     

       233
       233
       +
               print("🖼️  Testing external embed creation")

     

       234
       234
       +
               print("="*60)

     

       235
       235
       +
               aggregator.run()

     

       236
       236
       +
       

     

       237
       237
       +
               # Verify posts were created

     

       238
       238
       +
               posted_count = aggregator.state_manager.get_posted_count(

     

       239
       239
       +
                   "https://news.kagi.com/world.xml"

     

       240
       240
       +
               )

     

       241
       241
       +
               print(f"\n✅ Posted {posted_count} stories with external embeds")

     

       242
       242
       +
               assert posted_count > 0

     

       243
       243
       +
       

     

       244
       244
       +
           def test_authentication_with_live_pds(self, aggregator_credentials):

     

       245
       245
       +
               """

     

       246
       246
       +
               Test authentication against live PDS.

     

       247
       247
       +
       

     

       248
       248
       +
               Verifies:

     

       249
       249
       +
               - Can authenticate with aggregator account

     

       250
       250
       +
               - Receives valid JWT tokens

     

       251
       251
       +
               - DID matches expected format

     

       252
       252
       +
               """

     

       253
       253
       +
               handle, password = aggregator_credentials

     

       254
       254
       +
       

     

       255
       255
       +
               print("\n" + "="*60)

     

       256
       256
       +
               print(f"🔐 Testing authentication: {handle}")

     

       257
       257
       +
               print("="*60)

     

       258
       258
       +
       

     

       259
       259
       +
               # Create client and authenticate

     

       260
       260
       +
               client = CovesClient(

     

       261
       261
       +
                   api_url="http://localhost:8081",  # AppView for posting

     

       262
       262
       +
                   handle=handle,

     

       263
       263
       +
                   password=password,

     

       264
       264
       +
                   pds_url="http://localhost:3001"  # PDS for auth

     

       265
       265
       +
               )

     

       266
       266
       +
       

     

       267
       267
       +
               client.authenticate()

     

       268
       268
       +
       

     

       269
       269
       +
               print(f"\n✅ Authentication successful")

     

       270
       270
       +
               print(f"   Handle: {client.handle}")

     

       271
       271
       +
               print(f"   Authenticated: {client._authenticated}")

     

       272
       272
       +
       

     

       273
       273
       +
               assert client._authenticated is True

     

       274
       274
       +
               assert hasattr(client, 'did')

     

       275
       275
       +
               assert client.did.startswith("did:plc:")

     

       276
       276
       +
       

     

       277
       277
       +
           def test_state_persistence_across_runs(

     

       278
       278
       +
               self,

     

       279
       279
       +
               test_config_file,

     

       280
       280
       +
               test_state_file,

     

       281
       281
       +
               aggregator_credentials

     

       282
       282
       +
           ):

     

       283
       283
       +
               """

     

       284
       284
       +
               Test that state persists correctly across multiple runs.

     

       285
       285
       +
       

     

       286
       286
       +
               Verifies:

     

       287
       287
       +
               - State file is created

     

       288
       288
       +
               - Posted GUIDs are tracked

     

       289
       289
       +
               - Last run timestamp is updated

     

       290
       290
       +
               - State survives aggregator restart

     

       291
       291
       +
               """

     

       292
       292
       +
               # Mock empty feed (to avoid posting)

     

       293
       293
       +
               import responses as resp

     

       294
       294
       +
               resp.start()

     

       295
       295
       +
               resp.add(

     

       296
       296
       +
                   resp.GET,

     

       297
       297
       +
                   "https://news.kagi.com/world.xml",

     

       298
       298
       +
                   body='<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>',

     

       299
       299
       +
                   status=200

     

       300
       300
       +
               )

     

       301
       301
       +
       

     

       302
       302
       +
               handle, password = aggregator_credentials

     

       303
       303
       +
               os.environ['AGGREGATOR_HANDLE'] = handle

     

       304
       304
       +
               os.environ['AGGREGATOR_PASSWORD'] = password

     

       305
       305
       +
       

     

       306
       306
       +
               print("\n" + "="*60)

     

       307
       307
       +
               print("💾 Testing state persistence")

     

       308
       308
       +
               print("="*60)

     

       309
       309
       +
       

     

       310
       310
       +
               # First run

     

       311
       311
       +
               aggregator1 = Aggregator(

     

       312
       312
       +
                   config_path=test_config_file,

     

       313
       313
       +
                   state_file=test_state_file

     

       314
       314
       +
               )

     

       315
       315
       +
               aggregator1.run()

     

       316
       316
       +
       

     

       317
       317
       +
               # Verify state file was created

     

       318
       318
       +
               assert test_state_file.exists(), "State file should be created"

     

       319
       319
       +
               print(f"\n✅ State file created: {test_state_file}")

     

       320
       320
       +
       

     

       321
       321
       +
               # Verify last run was recorded

     

       322
       322
       +
               last_run1 = aggregator1.state_manager.get_last_run(

     

       323
       323
       +
                   "https://news.kagi.com/world.xml"

     

       324
       324
       +
               )

     

       325
       325
       +
               assert last_run1 is not None, "Last run should be recorded"

     

       326
       326
       +
               print(f"   Last run: {last_run1}")

     

       327
       327
       +
       

     

       328
       328
       +
               # Second run (new instance)

     

       329
       329
       +
               aggregator2 = Aggregator(

     

       330
       330
       +
                   config_path=test_config_file,

     

       331
       331
       +
                   state_file=test_state_file

     

       332
       332
       +
               )

     

       333
       333
       +
               aggregator2.run()

     

       334
       334
       +
       

     

       335
       335
       +
               # Verify state persisted

     

       336
       336
       +
               last_run2 = aggregator2.state_manager.get_last_run(

     

       337
       337
       +
                   "https://news.kagi.com/world.xml"

     

       338
       338
       +
               )

     

       339
       339
       +
               assert last_run2 >= last_run1, "Last run should be updated"

     

       340
       340
       +
               print(f"   Last run (after restart): {last_run2}")

     

       341
       341
       +
               print(f"\n✅ State persisted across aggregator restarts")

     

       342
       342
       +
       

     

       343
       343
       +
               resp.stop()

     

       344
       344
       +
               resp.reset()

     

       345
       345
       +
       

     

       346
       346
       +
           def test_error_recovery(

     

       347
       347
       +
               self,

     

       348
       348
       +
               test_config_file,

     

       349
       349
       +
               test_state_file,

     

       350
       350
       +
               aggregator_credentials

     

       351
       351
       +
           ):

     

       352
       352
       +
               """

     

       353
       353
       +
               Test that aggregator handles errors gracefully.

     

       354
       354
       +
       

     

       355
       355
       +
               Verifies:

     

       356
       356
       +
               - Continues processing on feed errors

     

       357
       357
       +
               - Doesn't crash on network failures

     

       358
       358
       +
               - Logs errors appropriately

     

       359
       359
       +
               """

     

       360
       360
       +
               # Mock feed failure

     

       361
       361
       +
               import responses as resp

     

       362
       362
       +
               resp.start()

     

       363
       363
       +
               resp.add(

     

       364
       364
       +
                   resp.GET,

     

       365
       365
       +
                   "https://news.kagi.com/world.xml",

     

       366
       366
       +
                   body="Internal Server Error",

     

       367
       367
       +
                   status=500

     

       368
       368
       +
               )

     

       369
       369
       +
       

     

       370
       370
       +
               handle, password = aggregator_credentials

     

       371
       371
       +
               os.environ['AGGREGATOR_HANDLE'] = handle

     

       372
       372
       +
               os.environ['AGGREGATOR_PASSWORD'] = password

     

       373
       373
       +
       

     

       374
       374
       +
               print("\n" + "="*60)

     

       375
       375
       +
               print("🛡️  Testing error recovery")

     

       376
       376
       +
               print("="*60)

     

       377
       377
       +
       

     

       378
       378
       +
               # Should not crash

     

       379
       379
       +
               aggregator = Aggregator(

     

       380
       380
       +
                   config_path=test_config_file,

     

       381
       381
       +
                   state_file=test_state_file

     

       382
       382
       +
               )

     

       383
       383
       +
       

     

       384
       384
       +
               try:

     

       385
       385
       +
                   aggregator.run()

     

       386
       386
       +
                   print(f"\n✅ Aggregator handled feed error gracefully")

     

       387
       387
       +
               except Exception as e:

     

       388
       388
       +
                   pytest.fail(f"Aggregator should handle errors gracefully: {e}")

     

       389
       389
       +
       

     

       390
       390
       +
               resp.stop()

     

       391
       391
       +
               resp.reset()

     

       392
       392
       +
       

     

       393
       393
       +
       

     

       394
       394
       +
       def test_coves_client_external_embed_format(aggregator_credentials):

     

       395
       395
       +
           """

     

       396
       396
       +
           Test external embed formatting.

     

       397
       397
       +
       

     

       398
       398
       +
           Verifies:

     

       399
       399
       +
           - Embed structure matches social.coves.embed.external

     

       400
       400
       +
           - All required fields are present

     

       401
       401
       +
           - Optional thumbnail is included when provided

     

       402
       402
       +
           """

     

       403
       403
       +
           handle, password = aggregator_credentials

     

       404
       404
       +
       

     

       405
       405
       +
           client = CovesClient(

     

       406
       406
       +
               api_url="http://localhost:8081",

     

       407
       407
       +
               handle=handle,

     

       408
       408
       +
               password=password

     

       409
       409
       +
           )

     

       410
       410
       +
       

     

       411
       411
       +
           # Test with thumbnail

     

       412
       412
       +
           embed = client.create_external_embed(

     

       413
       413
       +
               uri="https://example.com/story",

     

       414
       414
       +
               title="Test Story",

     

       415
       415
       +
               description="Test description",

     

       416
       416
       +
               thumb="https://example.com/image.jpg"

     

       417
       417
       +
           )

     

       418
       418
       +
       

     

       419
       419
       +
           assert embed["$type"] == "social.coves.embed.external"

     

       420
       420
       +
           assert embed["external"]["uri"] == "https://example.com/story"

     

       421
       421
       +
           assert embed["external"]["title"] == "Test Story"

     

       422
       422
       +
           assert embed["external"]["description"] == "Test description"

     

       423
       423
       +
           assert embed["external"]["thumb"] == "https://example.com/image.jpg"

     

       424
       424
       +
       

     

       425
       425
       +
           # Test without thumbnail

     

       426
       426
       +
           embed_no_thumb = client.create_external_embed(

     

       427
       427
       +
               uri="https://example.com/story2",

     

       428
       428
       +
               title="Test Story 2",

     

       429
       429
       +
               description="Test description 2"

     

       430
       430
       +
           )

     

       431
       431
       +
       

     

       432
       432
       +
           assert "thumb" not in embed_no_thumb["external"]

     

       433
       433
       +
           print("\n✅ External embed format correct")

+122

aggregators/kagi-news/tests/test_html_parser.py

···

       1
       1
       +
       """

     

       2
       2
       +
       Tests for Kagi HTML description parser.

     

       3
       3
       +
       """

     

       4
       4
       +
       import pytest

     

       5
       5
       +
       from pathlib import Path

     

       6
       6
       +
       from datetime import datetime

     

       7
       7
       +
       import html

     

       8
       8
       +
       

     

       9
       9
       +
       from src.html_parser import KagiHTMLParser

     

       10
       10
       +
       from src.models import KagiStory, Perspective, Quote, Source

     

       11
       11
       +
       

     

       12
       12
       +
       

     

       13
       13
       +
       @pytest.fixture

     

       14
       14
       +
       def sample_html_description():

     

       15
       15
       +
           """Load sample HTML from RSS item fixture."""

     

       16
       16
       +
           # This is the escaped HTML from the RSS description field

     

       17
       17
       +
           html_content = """<p>The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30, at the end of an Asia trip that includes Malaysia and Japan . The administration said the meeting will take place Thursday morning local time, and Mr Trump indicated his first question to Xi would concern fentanyl and other bilateral issues . The talks come amid heightened trade tensions after Beijing expanded export curbs on rare-earth minerals and following Mr Trump's recent threat of additional tariffs on Chinese goods, making the meeting a focal point for discussions on trade, technology supply chains and energy .</p><img src='https://kagiproxy.com/img/Q2SRXQtwTYBIiQeI0FG-X6taF_wHSJaXDiFUzju2kbCWGuOYIFUX--8L0BqE4VKxpbOJY3ylFPJkDpfSnyQYZ1qdOLXbphHTnsOK4jb7gqC4KCn5nf3ANbWCuaFD5ZUSijiK0k7wOLP2fyX6tynu2mPtXlCbotLo2lTrEswZl4-No2AI4mI4lkResfnRdp-YjpoEfCOHkNfbN1-0cNcHt9T2dmgBSXrQ2w' alt='News image associated with coverage of President Trump&#x27;s Asia trip and planned meeting with President Xi' /><br /><h3>Highlights:</h3><ul><li>Itinerary details: The Asia swing begins in Malaysia, continues to Japan and ends with the bilateral meeting in South Korea on Thursday morning local time, White House press secretary Karoline Leavitt said at a briefing .</li><li>APEC context: US officials indicated the leaders will meet on the sidelines of the Asia-Pacific Economic Cooperation gathering, shaping expectations for short, high-level talks rather than a lengthy summit .</li></ul><blockquote>Work out a lot of our doubts and questions - President Trump</blockquote><h3>Perspectives:</h3><ul><li>President Trump: He said his first question to President Xi would be about fentanyl and indicated he hoped to resolve bilateral doubts and questions in the talks. (<a href='https://www.straitstimes.com/world/united-states/trump-to-meet-xi-in-south-korea-on-oct-30-as-part-of-asia-swing'>The Straits Times</a>)</li><li>White House (press secretary): Karoline Leavitt confirmed the bilateral meeting will occur Thursday morning local time during a White House briefing. (<a href='https://www.scmp.com/news/us/diplomacy/article/3330131/donald-trump-meet-chinas-xi-jinping-next-thursday-south-korea-crunch-talks'>South China Morning Post</a>)</li></ul><h3>Sources:</h3><ul><li><a href='https://www.straitstimes.com/world/united-states/trump-to-meet-xi-in-south-korea-on-oct-30-as-part-of-asia-swing'>Trump to meet Xi in South Korea on Oct 30 as part of Asia swing</a> - straitstimes.com</li><li><a href='https://www.scmp.com/news/us/diplomacy/article/3330131/donald-trump-meet-chinas-xi-jinping-next-thursday-south-korea-crunch-talks'>Trump to meet Xi in South Korea next Thursday as part of key Asia trip</a> - scmp.com</li></ul>"""

     

       18
       18
       +
           return html_content

     

       19
       19
       +
       

     

       20
       20
       +
       

     

       21
       21
       +
       class TestKagiHTMLParser:

     

       22
       22
       +
           """Test suite for Kagi HTML parser."""

     

       23
       23
       +
       

     

       24
       24
       +
           def test_parse_summary(self, sample_html_description):

     

       25
       25
       +
               """Test extracting summary paragraph."""

     

       26
       26
       +
               parser = KagiHTMLParser()

     

       27
       27
       +
               result = parser.parse(sample_html_description)

     

       28
       28
       +
       

     

       29
       29
       +
               assert result['summary'].startswith("The White House confirmed President Trump")

     

       30
       30
       +
               assert "bilateral meeting with Chinese President Xi Jinping" in result['summary']

     

       31
       31
       +
       

     

       32
       32
       +
           def test_parse_image_url(self, sample_html_description):

     

       33
       33
       +
               """Test extracting image URL and alt text."""

     

       34
       34
       +
               parser = KagiHTMLParser()

     

       35
       35
       +
               result = parser.parse(sample_html_description)

     

       36
       36
       +
       

     

       37
       37
       +
               assert result['image_url'] is not None

     

       38
       38
       +
               assert result['image_url'].startswith("https://kagiproxy.com/img/")

     

       39
       39
       +
               assert result['image_alt'] is not None

     

       40
       40
       +
               assert "Trump" in result['image_alt']

     

       41
       41
       +
       

     

       42
       42
       +
           def test_parse_highlights(self, sample_html_description):

     

       43
       43
       +
               """Test extracting highlights list."""

     

       44
       44
       +
               parser = KagiHTMLParser()

     

       45
       45
       +
               result = parser.parse(sample_html_description)

     

       46
       46
       +
       

     

       47
       47
       +
               assert len(result['highlights']) == 2

     

       48
       48
       +
               assert "Itinerary details" in result['highlights'][0]

     

       49
       49
       +
               assert "APEC context" in result['highlights'][1]

     

       50
       50
       +
       

     

       51
       51
       +
           def test_parse_quote(self, sample_html_description):

     

       52
       52
       +
               """Test extracting blockquote."""

     

       53
       53
       +
               parser = KagiHTMLParser()

     

       54
       54
       +
               result = parser.parse(sample_html_description)

     

       55
       55
       +
       

     

       56
       56
       +
               assert result['quote'] is not None

     

       57
       57
       +
               assert result['quote']['text'] == "Work out a lot of our doubts and questions"

     

       58
       58
       +
               assert result['quote']['attribution'] == "President Trump"

     

       59
       59
       +
       

     

       60
       60
       +
           def test_parse_perspectives(self, sample_html_description):

     

       61
       61
       +
               """Test extracting perspectives list."""

     

       62
       62
       +
               parser = KagiHTMLParser()

     

       63
       63
       +
               result = parser.parse(sample_html_description)

     

       64
       64
       +
       

     

       65
       65
       +
               assert len(result['perspectives']) == 2

     

       66
       66
       +
       

     

       67
       67
       +
               # First perspective

     

       68
       68
       +
               assert result['perspectives'][0]['actor'] == "President Trump"

     

       69
       69
       +
               assert "fentanyl" in result['perspectives'][0]['description']

     

       70
       70
       +
               assert result['perspectives'][0]['source_url'] == "https://www.straitstimes.com/world/united-states/trump-to-meet-xi-in-south-korea-on-oct-30-as-part-of-asia-swing"

     

       71
       71
       +
       

     

       72
       72
       +
               # Second perspective

     

       73
       73
       +
               assert "White House" in result['perspectives'][1]['actor']

     

       74
       74
       +
       

     

       75
       75
       +
           def test_parse_sources(self, sample_html_description):

     

       76
       76
       +
               """Test extracting sources list."""

     

       77
       77
       +
               parser = KagiHTMLParser()

     

       78
       78
       +
               result = parser.parse(sample_html_description)

     

       79
       79
       +
       

     

       80
       80
       +
               assert len(result['sources']) >= 2

     

       81
       81
       +
       

     

       82
       82
       +
               # Check first source

     

       83
       83
       +
               assert result['sources'][0]['title'] == "Trump to meet Xi in South Korea on Oct 30 as part of Asia swing"

     

       84
       84
       +
               assert result['sources'][0]['url'].startswith("https://www.straitstimes.com")

     

       85
       85
       +
               assert result['sources'][0]['domain'] == "straitstimes.com"

     

       86
       86
       +
       

     

       87
       87
       +
           def test_parse_missing_sections(self):

     

       88
       88
       +
               """Test parsing HTML with missing sections."""

     

       89
       89
       +
               html_minimal = "<p>Just a summary, no other sections.</p>"

     

       90
       90
       +
       

     

       91
       91
       +
               parser = KagiHTMLParser()

     

       92
       92
       +
               result = parser.parse(html_minimal)

     

       93
       93
       +
       

     

       94
       94
       +
               assert result['summary'] == "Just a summary, no other sections."

     

       95
       95
       +
               assert result['highlights'] == []

     

       96
       96
       +
               assert result['perspectives'] == []

     

       97
       97
       +
               assert result['sources'] == []

     

       98
       98
       +
               assert result['quote'] is None

     

       99
       99
       +
               assert result['image_url'] is None

     

       100
       100
       +
       

     

       101
       101
       +
           def test_parse_to_kagi_story(self, sample_html_description):

     

       102
       102
       +
               """Test converting parsed HTML to KagiStory object."""

     

       103
       103
       +
               parser = KagiHTMLParser()

     

       104
       104
       +
       

     

       105
       105
       +
               # Simulate full RSS item data

     

       106
       106
       +
               story = parser.parse_to_story(

     

       107
       107
       +
                   title="Trump to meet Xi in South Korea on Oct 30",

     

       108
       108
       +
                   link="https://kite.kagi.com/test/world/10",

     

       109
       109
       +
                   guid="https://kite.kagi.com/test/world/10",

     

       110
       110
       +
                   pub_date=datetime(2025, 10, 23, 20, 56, 0),

     

       111
       111
       +
                   categories=["World", "World/Diplomacy"],

     

       112
       112
       +
                   html_description=sample_html_description

     

       113
       113
       +
               )

     

       114
       114
       +
       

     

       115
       115
       +
               assert isinstance(story, KagiStory)

     

       116
       116
       +
               assert story.title == "Trump to meet Xi in South Korea on Oct 30"

     

       117
       117
       +
               assert story.link == "https://kite.kagi.com/test/world/10"

     

       118
       118
       +
               assert len(story.highlights) == 2

     

       119
       119
       +
               assert len(story.perspectives) == 2

     

       120
       120
       +
               assert len(story.sources) >= 2

     

       121
       121
       +
               assert story.quote is not None

     

       122
       122
       +
               assert story.image_url is not None

+460

aggregators/kagi-news/tests/test_main.py

···

       1
       1
       +
       """

     

       2
       2
       +
       Tests for Main Orchestration Script.

     

       3
       3
       +
       

     

       4
       4
       +
       Tests the complete flow: fetch → parse → format → dedupe → post → update state.

     

       5
       5
       +
       """

     

       6
       6
       +
       import pytest

     

       7
       7
       +
       from pathlib import Path

     

       8
       8
       +
       from datetime import datetime

     

       9
       9
       +
       from unittest.mock import Mock, MagicMock, patch, call

     

       10
       10
       +
       import feedparser

     

       11
       11
       +
       

     

       12
       12
       +
       from src.main import Aggregator

     

       13
       13
       +
       from src.models import KagiStory, AggregatorConfig, FeedConfig, Perspective, Quote, Source

     

       14
       14
       +
       

     

       15
       15
       +
       

     

       16
       16
       +
       @pytest.fixture

     

       17
       17
       +
       def mock_config():

     

       18
       18
       +
           """Mock aggregator configuration."""

     

       19
       19
       +
           return AggregatorConfig(

     

       20
       20
       +
               coves_api_url="https://api.coves.social",

     

       21
       21
       +
               feeds=[

     

       22
       22
       +
                   FeedConfig(

     

       23
       23
       +
                       name="World News",

     

       24
       24
       +
                       url="https://news.kagi.com/world.xml",

     

       25
       25
       +
                       community_handle="world-news.coves.social",

     

       26
       26
       +
                       enabled=True

     

       27
       27
       +
                   ),

     

       28
       28
       +
                   FeedConfig(

     

       29
       29
       +
                       name="Tech News",

     

       30
       30
       +
                       url="https://news.kagi.com/tech.xml",

     

       31
       31
       +
                       community_handle="tech.coves.social",

     

       32
       32
       +
                       enabled=True

     

       33
       33
       +
                   ),

     

       34
       34
       +
                   FeedConfig(

     

       35
       35
       +
                       name="Disabled Feed",

     

       36
       36
       +
                       url="https://news.kagi.com/disabled.xml",

     

       37
       37
       +
                       community_handle="disabled.coves.social",

     

       38
       38
       +
                       enabled=False

     

       39
       39
       +
                   )

     

       40
       40
       +
               ],

     

       41
       41
       +
               log_level="info"

     

       42
       42
       +
           )

     

       43
       43
       +
       

     

       44
       44
       +
       

     

       45
       45
       +
       @pytest.fixture

     

       46
       46
       +
       def sample_story():

     

       47
       47
       +
           """Sample KagiStory for testing."""

     

       48
       48
       +
           return KagiStory(

     

       49
       49
       +
               title="Test Story",

     

       50
       50
       +
               link="https://kite.kagi.com/test/world/1",

     

       51
       51
       +
               guid="https://kite.kagi.com/test/world/1",

     

       52
       52
       +
               pub_date=datetime(2024, 1, 15, 12, 0, 0),

     

       53
       53
       +
               categories=["World"],

     

       54
       54
       +
               summary="Test summary",

     

       55
       55
       +
               highlights=["Highlight 1", "Highlight 2"],

     

       56
       56
       +
               perspectives=[

     

       57
       57
       +
                   Perspective(

     

       58
       58
       +
                       actor="Test Actor",

     

       59
       59
       +
                       description="Test description",

     

       60
       60
       +
                       source_url="https://example.com/source"

     

       61
       61
       +
                   )

     

       62
       62
       +
               ],

     

       63
       63
       +
               quote=Quote(text="Test quote", attribution="Test Author"),

     

       64
       64
       +
               sources=[

     

       65
       65
       +
                   Source(title="Source 1", url="https://example.com/1", domain="example.com")

     

       66
       66
       +
               ],

     

       67
       67
       +
               image_url="https://example.com/image.jpg",

     

       68
       68
       +
               image_alt="Test image"

     

       69
       69
       +
           )

     

       70
       70
       +
       

     

       71
       71
       +
       

     

       72
       72
       +
       @pytest.fixture

     

       73
       73
       +
       def mock_rss_feed():

     

       74
       74
       +
           """Mock RSS feed with sample entries."""

     

       75
       75
       +
           feed = MagicMock()

     

       76
       76
       +
           feed.bozo = 0

     

       77
       77
       +
           feed.entries = [

     

       78
       78
       +
               MagicMock(

     

       79
       79
       +
                   title="Story 1",

     

       80
       80
       +
                   link="https://kite.kagi.com/test/world/1",

     

       81
       81
       +
                   guid="https://kite.kagi.com/test/world/1",

     

       82
       82
       +
                   published_parsed=(2024, 1, 15, 12, 0, 0, 0, 15, 0),

     

       83
       83
       +
                   tags=[MagicMock(term="World")],

     

       84
       84
       +
                   description="<p>Story 1 description</p>"

     

       85
       85
       +
               ),

     

       86
       86
       +
               MagicMock(

     

       87
       87
       +
                   title="Story 2",

     

       88
       88
       +
                   link="https://kite.kagi.com/test/world/2",

     

       89
       89
       +
                   guid="https://kite.kagi.com/test/world/2",

     

       90
       90
       +
                   published_parsed=(2024, 1, 15, 13, 0, 0, 0, 15, 0),

     

       91
       91
       +
                   tags=[MagicMock(term="World")],

     

       92
       92
       +
                   description="<p>Story 2 description</p>"

     

       93
       93
       +
               )

     

       94
       94
       +
           ]

     

       95
       95
       +
           return feed

     

       96
       96
       +
       

     

       97
       97
       +
       

     

       98
       98
       +
       class TestAggregator:

     

       99
       99
       +
           """Test suite for Aggregator orchestration."""

     

       100
       100
       +
       

     

       101
       101
       +
           def test_initialize_aggregator(self, mock_config, tmp_path):

     

       102
       102
       +
               """Test aggregator initialization."""

     

       103
       103
       +
               state_file = tmp_path / "state.json"

     

       104
       104
       +
       

     

       105
       105
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader:

     

       106
       106
       +
                   mock_loader = Mock()

     

       107
       107
       +
                   mock_loader.load.return_value = mock_config

     

       108
       108
       +
                   MockConfigLoader.return_value = mock_loader

     

       109
       109
       +
       

     

       110
       110
       +
                   aggregator = Aggregator(

     

       111
       111
       +
                       config_path=Path("config.yaml"),

     

       112
       112
       +
                       state_file=state_file,

     

       113
       113
       +
                       coves_client=Mock()

     

       114
       114
       +
                   )

     

       115
       115
       +
       

     

       116
       116
       +
                   assert aggregator.config == mock_config

     

       117
       117
       +
                   assert aggregator.state_file == state_file

     

       118
       118
       +
       

     

       119
       119
       +
           def test_process_enabled_feeds_only(self, mock_config, tmp_path):

     

       120
       120
       +
               """Test that only enabled feeds are processed."""

     

       121
       121
       +
               state_file = tmp_path / "state.json"

     

       122
       122
       +
               mock_client = Mock()

     

       123
       123
       +
       

     

       124
       124
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       125
       125
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher:

     

       126
       126
       +
       

     

       127
       127
       +
                   mock_loader = Mock()

     

       128
       128
       +
                   mock_loader.load.return_value = mock_config

     

       129
       129
       +
                   MockConfigLoader.return_value = mock_loader

     

       130
       130
       +
       

     

       131
       131
       +
                   mock_fetcher = Mock()

     

       132
       132
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       133
       133
       +
       

     

       134
       134
       +
                   aggregator = Aggregator(

     

       135
       135
       +
                       config_path=Path("config.yaml"),

     

       136
       136
       +
                       state_file=state_file,

     

       137
       137
       +
                       coves_client=mock_client

     

       138
       138
       +
                   )

     

       139
       139
       +
       

     

       140
       140
       +
                   # Mock empty feeds

     

       141
       141
       +
                   mock_fetcher.fetch_feed.return_value = MagicMock(bozo=0, entries=[])

     

       142
       142
       +
       

     

       143
       143
       +
                   aggregator.run()

     

       144
       144
       +
       

     

       145
       145
       +
                   # Should only fetch enabled feeds (2)

     

       146
       146
       +
                   assert mock_fetcher.fetch_feed.call_count == 2

     

       147
       147
       +
       

     

       148
       148
       +
           def test_full_successful_flow(self, mock_config, mock_rss_feed, sample_story, tmp_path):

     

       149
       149
       +
               """Test complete flow: fetch → parse → format → post → update state."""

     

       150
       150
       +
               state_file = tmp_path / "state.json"

     

       151
       151
       +
               mock_client = Mock()

     

       152
       152
       +
               mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"

     

       153
       153
       +
       

     

       154
       154
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       155
       155
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher, \

     

       156
       156
       +
                    patch('src.main.KagiHTMLParser') as MockHTMLParser, \

     

       157
       157
       +
                    patch('src.main.RichTextFormatter') as MockFormatter:

     

       158
       158
       +
       

     

       159
       159
       +
                   # Setup mocks

     

       160
       160
       +
                   mock_loader = Mock()

     

       161
       161
       +
                   mock_loader.load.return_value = mock_config

     

       162
       162
       +
                   MockConfigLoader.return_value = mock_loader

     

       163
       163
       +
       

     

       164
       164
       +
                   mock_fetcher = Mock()

     

       165
       165
       +
                   mock_fetcher.fetch_feed.return_value = mock_rss_feed

     

       166
       166
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       167
       167
       +
       

     

       168
       168
       +
                   mock_parser = Mock()

     

       169
       169
       +
                   mock_parser.parse_to_story.return_value = sample_story

     

       170
       170
       +
                   MockHTMLParser.return_value = mock_parser

     

       171
       171
       +
       

     

       172
       172
       +
                   mock_formatter = Mock()

     

       173
       173
       +
                   mock_formatter.format_full.return_value = {

     

       174
       174
       +
                       "content": "Test content",

     

       175
       175
       +
                       "facets": []

     

       176
       176
       +
                   }

     

       177
       177
       +
                   MockFormatter.return_value = mock_formatter

     

       178
       178
       +
       

     

       179
       179
       +
                   # Run aggregator

     

       180
       180
       +
                   aggregator = Aggregator(

     

       181
       181
       +
                       config_path=Path("config.yaml"),

     

       182
       182
       +
                       state_file=state_file,

     

       183
       183
       +
                       coves_client=mock_client

     

       184
       184
       +
                   )

     

       185
       185
       +
                   aggregator.run()

     

       186
       186
       +
       

     

       187
       187
       +
                   # Verify RSS fetching

     

       188
       188
       +
                   assert mock_fetcher.fetch_feed.call_count == 2

     

       189
       189
       +
       

     

       190
       190
       +
                   # Verify parsing (2 entries per feed * 2 feeds = 4 total)

     

       191
       191
       +
                   assert mock_parser.parse_to_story.call_count == 4

     

       192
       192
       +
       

     

       193
       193
       +
                   # Verify formatting

     

       194
       194
       +
                   assert mock_formatter.format_full.call_count == 4

     

       195
       195
       +
       

     

       196
       196
       +
                   # Verify posting (should call create_post for each story)

     

       197
       197
       +
                   assert mock_client.create_post.call_count == 4

     

       198
       198
       +
       

     

       199
       199
       +
           def test_deduplication_skips_posted_stories(self, mock_config, mock_rss_feed, sample_story, tmp_path):

     

       200
       200
       +
               """Test that already-posted stories are skipped."""

     

       201
       201
       +
               state_file = tmp_path / "state.json"

     

       202
       202
       +
               mock_client = Mock()

     

       203
       203
       +
               mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"

     

       204
       204
       +
       

     

       205
       205
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       206
       206
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher, \

     

       207
       207
       +
                    patch('src.main.KagiHTMLParser') as MockHTMLParser, \

     

       208
       208
       +
                    patch('src.main.RichTextFormatter') as MockFormatter:

     

       209
       209
       +
       

     

       210
       210
       +
                   # Setup mocks

     

       211
       211
       +
                   mock_loader = Mock()

     

       212
       212
       +
                   mock_loader.load.return_value = mock_config

     

       213
       213
       +
                   MockConfigLoader.return_value = mock_loader

     

       214
       214
       +
       

     

       215
       215
       +
                   mock_fetcher = Mock()

     

       216
       216
       +
                   mock_fetcher.fetch_feed.return_value = mock_rss_feed

     

       217
       217
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       218
       218
       +
       

     

       219
       219
       +
                   mock_parser = Mock()

     

       220
       220
       +
                   mock_parser.parse_to_story.return_value = sample_story

     

       221
       221
       +
                   MockHTMLParser.return_value = mock_parser

     

       222
       222
       +
       

     

       223
       223
       +
                   mock_formatter = Mock()

     

       224
       224
       +
                   mock_formatter.format_full.return_value = {

     

       225
       225
       +
                       "content": "Test content",

     

       226
       226
       +
                       "facets": []

     

       227
       227
       +
                   }

     

       228
       228
       +
                   MockFormatter.return_value = mock_formatter

     

       229
       229
       +
       

     

       230
       230
       +
                   # First run: posts all stories

     

       231
       231
       +
                   aggregator = Aggregator(

     

       232
       232
       +
                       config_path=Path("config.yaml"),

     

       233
       233
       +
                       state_file=state_file,

     

       234
       234
       +
                       coves_client=mock_client

     

       235
       235
       +
                   )

     

       236
       236
       +
                   aggregator.run()

     

       237
       237
       +
       

     

       238
       238
       +
                   # Verify first run posted stories

     

       239
       239
       +
                   first_run_posts = mock_client.create_post.call_count

     

       240
       240
       +
                   assert first_run_posts == 4

     

       241
       241
       +
       

     

       242
       242
       +
                   # Second run: should skip all (already posted)

     

       243
       243
       +
                   mock_client.reset_mock()

     

       244
       244
       +
                   aggregator2 = Aggregator(

     

       245
       245
       +
                       config_path=Path("config.yaml"),

     

       246
       246
       +
                       state_file=state_file,

     

       247
       247
       +
                       coves_client=mock_client

     

       248
       248
       +
                   )

     

       249
       249
       +
                   aggregator2.run()

     

       250
       250
       +
       

     

       251
       251
       +
                   # Should not post any (all duplicates)

     

       252
       252
       +
                   assert mock_client.create_post.call_count == 0

     

       253
       253
       +
       

     

       254
       254
       +
           def test_continue_on_feed_error(self, mock_config, tmp_path):

     

       255
       255
       +
               """Test that processing continues if one feed fails."""

     

       256
       256
       +
               state_file = tmp_path / "state.json"

     

       257
       257
       +
               mock_client = Mock()

     

       258
       258
       +
       

     

       259
       259
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       260
       260
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher:

     

       261
       261
       +
       

     

       262
       262
       +
                   mock_loader = Mock()

     

       263
       263
       +
                   mock_loader.load.return_value = mock_config

     

       264
       264
       +
                   MockConfigLoader.return_value = mock_loader

     

       265
       265
       +
       

     

       266
       266
       +
                   mock_fetcher = Mock()

     

       267
       267
       +
                   # First feed fails, second succeeds

     

       268
       268
       +
                   mock_fetcher.fetch_feed.side_effect = [

     

       269
       269
       +
                       Exception("Network error"),

     

       270
       270
       +
                       MagicMock(bozo=0, entries=[])

     

       271
       271
       +
                   ]

     

       272
       272
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       273
       273
       +
       

     

       274
       274
       +
                   aggregator = Aggregator(

     

       275
       275
       +
                       config_path=Path("config.yaml"),

     

       276
       276
       +
                       state_file=state_file,

     

       277
       277
       +
                       coves_client=mock_client

     

       278
       278
       +
                   )

     

       279
       279
       +
       

     

       280
       280
       +
                   # Should not raise exception

     

       281
       281
       +
                   aggregator.run()

     

       282
       282
       +
       

     

       283
       283
       +
                   # Should have attempted both feeds

     

       284
       284
       +
                   assert mock_fetcher.fetch_feed.call_count == 2

     

       285
       285
       +
       

     

       286
       286
       +
           def test_handle_empty_feed(self, mock_config, tmp_path):

     

       287
       287
       +
               """Test handling of empty RSS feeds."""

     

       288
       288
       +
               state_file = tmp_path / "state.json"

     

       289
       289
       +
               mock_client = Mock()

     

       290
       290
       +
       

     

       291
       291
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       292
       292
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher:

     

       293
       293
       +
       

     

       294
       294
       +
                   mock_loader = Mock()

     

       295
       295
       +
                   mock_loader.load.return_value = mock_config

     

       296
       296
       +
                   MockConfigLoader.return_value = mock_loader

     

       297
       297
       +
       

     

       298
       298
       +
                   mock_fetcher = Mock()

     

       299
       299
       +
                   mock_fetcher.fetch_feed.return_value = MagicMock(bozo=0, entries=[])

     

       300
       300
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       301
       301
       +
       

     

       302
       302
       +
                   aggregator = Aggregator(

     

       303
       303
       +
                       config_path=Path("config.yaml"),

     

       304
       304
       +
                       state_file=state_file,

     

       305
       305
       +
                       coves_client=mock_client

     

       306
       306
       +
                   )

     

       307
       307
       +
                   aggregator.run()

     

       308
       308
       +
       

     

       309
       309
       +
                   # Should not post anything

     

       310
       310
       +
                   assert mock_client.create_post.call_count == 0

     

       311
       311
       +
       

     

       312
       312
       +
           def test_dont_update_state_on_failed_post(self, mock_config, mock_rss_feed, sample_story, tmp_path):

     

       313
       313
       +
               """Test that state is not updated if posting fails."""

     

       314
       314
       +
               state_file = tmp_path / "state.json"

     

       315
       315
       +
               mock_client = Mock()

     

       316
       316
       +
               mock_client.create_post.side_effect = Exception("Post failed")

     

       317
       317
       +
       

     

       318
       318
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       319
       319
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher, \

     

       320
       320
       +
                    patch('src.main.KagiHTMLParser') as MockHTMLParser, \

     

       321
       321
       +
                    patch('src.main.RichTextFormatter') as MockFormatter:

     

       322
       322
       +
       

     

       323
       323
       +
                   # Setup mocks

     

       324
       324
       +
                   mock_loader = Mock()

     

       325
       325
       +
                   mock_loader.load.return_value = mock_config

     

       326
       326
       +
                   MockConfigLoader.return_value = mock_loader

     

       327
       327
       +
       

     

       328
       328
       +
                   mock_fetcher = Mock()

     

       329
       329
       +
                   mock_fetcher.fetch_feed.return_value = mock_rss_feed

     

       330
       330
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       331
       331
       +
       

     

       332
       332
       +
                   mock_parser = Mock()

     

       333
       333
       +
                   mock_parser.parse_to_story.return_value = sample_story

     

       334
       334
       +
                   MockHTMLParser.return_value = mock_parser

     

       335
       335
       +
       

     

       336
       336
       +
                   mock_formatter = Mock()

     

       337
       337
       +
                   mock_formatter.format_full.return_value = {

     

       338
       338
       +
                       "content": "Test content",

     

       339
       339
       +
                       "facets": []

     

       340
       340
       +
                   }

     

       341
       341
       +
                   MockFormatter.return_value = mock_formatter

     

       342
       342
       +
       

     

       343
       343
       +
                   # Run aggregator (posts will fail)

     

       344
       344
       +
                   aggregator = Aggregator(

     

       345
       345
       +
                       config_path=Path("config.yaml"),

     

       346
       346
       +
                       state_file=state_file,

     

       347
       347
       +
                       coves_client=mock_client

     

       348
       348
       +
                   )

     

       349
       349
       +
                   aggregator.run()

     

       350
       350
       +
       

     

       351
       351
       +
                   # Reset client to succeed

     

       352
       352
       +
                   mock_client.reset_mock()

     

       353
       353
       +
                   mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"

     

       354
       354
       +
       

     

       355
       355
       +
                   # Second run: should try to post again (state wasn't updated)

     

       356
       356
       +
                   aggregator2 = Aggregator(

     

       357
       357
       +
                       config_path=Path("config.yaml"),

     

       358
       358
       +
                       state_file=state_file,

     

       359
       359
       +
                       coves_client=mock_client

     

       360
       360
       +
                   )

     

       361
       361
       +
                   aggregator2.run()

     

       362
       362
       +
       

     

       363
       363
       +
                   # Should post stories (they weren't marked as posted)

     

       364
       364
       +
                   assert mock_client.create_post.call_count == 4

     

       365
       365
       +
       

     

       366
       366
       +
           def test_update_last_run_timestamp(self, mock_config, tmp_path):

     

       367
       367
       +
               """Test that last_run timestamp is updated after successful processing."""

     

       368
       368
       +
               state_file = tmp_path / "state.json"

     

       369
       369
       +
               mock_client = Mock()

     

       370
       370
       +
       

     

       371
       371
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       372
       372
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher:

     

       373
       373
       +
       

     

       374
       374
       +
                   mock_loader = Mock()

     

       375
       375
       +
                   mock_loader.load.return_value = mock_config

     

       376
       376
       +
                   MockConfigLoader.return_value = mock_loader

     

       377
       377
       +
       

     

       378
       378
       +
                   mock_fetcher = Mock()

     

       379
       379
       +
                   mock_fetcher.fetch_feed.return_value = MagicMock(bozo=0, entries=[])

     

       380
       380
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       381
       381
       +
       

     

       382
       382
       +
                   aggregator = Aggregator(

     

       383
       383
       +
                       config_path=Path("config.yaml"),

     

       384
       384
       +
                       state_file=state_file,

     

       385
       385
       +
                       coves_client=mock_client

     

       386
       386
       +
                   )

     

       387
       387
       +
                   aggregator.run()

     

       388
       388
       +
       

     

       389
       389
       +
                   # Verify last_run was updated for both feeds

     

       390
       390
       +
                   feed1_last_run = aggregator.state_manager.get_last_run(

     

       391
       391
       +
                       "https://news.kagi.com/world.xml"

     

       392
       392
       +
                   )

     

       393
       393
       +
                   feed2_last_run = aggregator.state_manager.get_last_run(

     

       394
       394
       +
                       "https://news.kagi.com/tech.xml"

     

       395
       395
       +
                   )

     

       396
       396
       +
       

     

       397
       397
       +
                   assert feed1_last_run is not None

     

       398
       398
       +
                   assert feed2_last_run is not None

     

       399
       399
       +
       

     

       400
       400
       +
           def test_create_post_with_image_embed(self, mock_config, mock_rss_feed, sample_story, tmp_path):

     

       401
       401
       +
               """Test that posts include external image embeds."""

     

       402
       402
       +
               state_file = tmp_path / "state.json"

     

       403
       403
       +
               mock_client = Mock()

     

       404
       404
       +
               mock_client.create_post.return_value = "at://did:plc:test/social.coves.post/abc123"

     

       405
       405
       +
       

     

       406
       406
       +
               # Mock create_external_embed to return proper embed structure

     

       407
       407
       +
               mock_client.create_external_embed.return_value = {

     

       408
       408
       +
                   "$type": "social.coves.embed.external",

     

       409
       409
       +
                   "external": {

     

       410
       410
       +
                       "uri": sample_story.link,

     

       411
       411
       +
                       "title": sample_story.title,

     

       412
       412
       +
                       "description": sample_story.summary,

     

       413
       413
       +
                       "thumb": sample_story.image_url

     

       414
       414
       +
                   }

     

       415
       415
       +
               }

     

       416
       416
       +
       

     

       417
       417
       +
               with patch('src.main.ConfigLoader') as MockConfigLoader, \

     

       418
       418
       +
                    patch('src.main.RSSFetcher') as MockRSSFetcher, \

     

       419
       419
       +
                    patch('src.main.KagiHTMLParser') as MockHTMLParser, \

     

       420
       420
       +
                    patch('src.main.RichTextFormatter') as MockFormatter:

     

       421
       421
       +
       

     

       422
       422
       +
                   # Setup mocks

     

       423
       423
       +
                   mock_loader = Mock()

     

       424
       424
       +
                   mock_loader.load.return_value = mock_config

     

       425
       425
       +
                   MockConfigLoader.return_value = mock_loader

     

       426
       426
       +
       

     

       427
       427
       +
                   mock_fetcher = Mock()

     

       428
       428
       +
                   # Only one entry for simplicity

     

       429
       429
       +
                   single_entry_feed = MagicMock(bozo=0, entries=[mock_rss_feed.entries[0]])

     

       430
       430
       +
                   mock_fetcher.fetch_feed.return_value = single_entry_feed

     

       431
       431
       +
                   MockRSSFetcher.return_value = mock_fetcher

     

       432
       432
       +
       

     

       433
       433
       +
                   mock_parser = Mock()

     

       434
       434
       +
                   mock_parser.parse_to_story.return_value = sample_story

     

       435
       435
       +
                   MockHTMLParser.return_value = mock_parser

     

       436
       436
       +
       

     

       437
       437
       +
                   mock_formatter = Mock()

     

       438
       438
       +
                   mock_formatter.format_full.return_value = {

     

       439
       439
       +
                       "content": "Test content",

     

       440
       440
       +
                       "facets": []

     

       441
       441
       +
                   }

     

       442
       442
       +
                   MockFormatter.return_value = mock_formatter

     

       443
       443
       +
       

     

       444
       444
       +
                   # Run aggregator

     

       445
       445
       +
                   aggregator = Aggregator(

     

       446
       446
       +
                       config_path=Path("config.yaml"),

     

       447
       447
       +
                       state_file=state_file,

     

       448
       448
       +
                       coves_client=mock_client

     

       449
       449
       +
                   )

     

       450
       450
       +
                   aggregator.run()

     

       451
       451
       +
       

     

       452
       452
       +
                   # Verify create_post was called with embed

     

       453
       453
       +
                   mock_client.create_post.assert_called()

     

       454
       454
       +
                   call_kwargs = mock_client.create_post.call_args.kwargs

     

       455
       455
       +
       

     

       456
       456
       +
                   assert "embed" in call_kwargs

     

       457
       457
       +
                   assert call_kwargs["embed"]["$type"] == "social.coves.embed.external"

     

       458
       458
       +
                   assert call_kwargs["embed"]["external"]["uri"] == sample_story.link

     

       459
       459
       +
                   assert call_kwargs["embed"]["external"]["title"] == sample_story.title

     

       460
       460
       +
                   assert call_kwargs["embed"]["external"]["thumb"] == sample_story.image_url

+299

aggregators/kagi-news/tests/test_richtext_formatter.py

···

       1
       1
       +
       """

     

       2
       2
       +
       Tests for Rich Text Formatter.

     

       3
       3
       +
       

     

       4
       4
       +
       Tests conversion of KagiStory to Coves rich text format with facets.

     

       5
       5
       +
       """

     

       6
       6
       +
       import pytest

     

       7
       7
       +
       from datetime import datetime

     

       8
       8
       +
       

     

       9
       9
       +
       from src.richtext_formatter import RichTextFormatter

     

       10
       10
       +
       from src.models import KagiStory, Perspective, Quote, Source

     

       11
       11
       +
       

     

       12
       12
       +
       

     

       13
       13
       +
       @pytest.fixture

     

       14
       14
       +
       def sample_story():

     

       15
       15
       +
           """Create a sample KagiStory for testing."""

     

       16
       16
       +
           return KagiStory(

     

       17
       17
       +
               title="Trump to meet Xi in South Korea",

     

       18
       18
       +
               link="https://kite.kagi.com/test/world/10",

     

       19
       19
       +
               guid="https://kite.kagi.com/test/world/10",

     

       20
       20
       +
               pub_date=datetime(2025, 10, 23, 20, 56, 0),

     

       21
       21
       +
               categories=["World", "World/Diplomacy"],

     

       22
       22
       +
               summary="The White House confirmed President Trump will hold a bilateral meeting with Chinese President Xi Jinping in South Korea on October 30.",

     

       23
       23
       +
               highlights=[

     

       24
       24
       +
                   "Itinerary details: The Asia swing begins in Malaysia, continues to Japan.",

     

       25
       25
       +
                   "APEC context: US officials indicated the leaders will meet on the sidelines."

     

       26
       26
       +
               ],

     

       27
       27
       +
               perspectives=[

     

       28
       28
       +
                   Perspective(

     

       29
       29
       +
                       actor="President Trump",

     

       30
       30
       +
                       description="He said his first question to President Xi would be about fentanyl.",

     

       31
       31
       +
                       source_url="https://www.straitstimes.com/world/test"

     

       32
       32
       +
                   ),

     

       33
       33
       +
                   Perspective(

     

       34
       34
       +
                       actor="White House (press secretary)",

     

       35
       35
       +
                       description="Karoline Leavitt confirmed the bilateral meeting.",

     

       36
       36
       +
                       source_url="https://www.scmp.com/news/test"

     

       37
       37
       +
                   )

     

       38
       38
       +
               ],

     

       39
       39
       +
               quote=Quote(

     

       40
       40
       +
                   text="Work out a lot of our doubts and questions",

     

       41
       41
       +
                   attribution="President Trump"

     

       42
       42
       +
               ),

     

       43
       43
       +
               sources=[

     

       44
       44
       +
                   Source(

     

       45
       45
       +
                       title="Trump to meet Xi in South Korea",

     

       46
       46
       +
                       url="https://www.straitstimes.com/world/test",

     

       47
       47
       +
                       domain="straitstimes.com"

     

       48
       48
       +
                   ),

     

       49
       49
       +
                   Source(

     

       50
       50
       +
                       title="Trump meeting Xi next Thursday",

     

       51
       51
       +
                       url="https://www.scmp.com/news/test",

     

       52
       52
       +
                       domain="scmp.com"

     

       53
       53
       +
                   )

     

       54
       54
       +
               ],

     

       55
       55
       +
               image_url="https://kagiproxy.com/img/test123",

     

       56
       56
       +
               image_alt="Test image"

     

       57
       57
       +
           )

     

       58
       58
       +
       

     

       59
       59
       +
       

     

       60
       60
       +
       class TestRichTextFormatter:

     

       61
       61
       +
           """Test suite for RichTextFormatter."""

     

       62
       62
       +
       

     

       63
       63
       +
           def test_format_full_returns_content_and_facets(self, sample_story):

     

       64
       64
       +
               """Test that format_full returns content and facets."""

     

       65
       65
       +
               formatter = RichTextFormatter()

     

       66
       66
       +
               result = formatter.format_full(sample_story)

     

       67
       67
       +
       

     

       68
       68
       +
               assert 'content' in result

     

       69
       69
       +
               assert 'facets' in result

     

       70
       70
       +
               assert isinstance(result['content'], str)

     

       71
       71
       +
               assert isinstance(result['facets'], list)

     

       72
       72
       +
       

     

       73
       73
       +
           def test_content_structure(self, sample_story):

     

       74
       74
       +
               """Test that content has correct structure."""

     

       75
       75
       +
               formatter = RichTextFormatter()

     

       76
       76
       +
               result = formatter.format_full(sample_story)

     

       77
       77
       +
               content = result['content']

     

       78
       78
       +
       

     

       79
       79
       +
               # Check all sections are present

     

       80
       80
       +
               assert sample_story.summary in content

     

       81
       81
       +
               assert "Highlights:" in content

     

       82
       82
       +
               assert "Perspectives:" in content

     

       83
       83
       +
               assert "Sources:" in content

     

       84
       84
       +
               assert sample_story.quote.text in content

     

       85
       85
       +
               assert "📰 Story aggregated by Kagi News" in content

     

       86
       86
       +
       

     

       87
       87
       +
           def test_facets_for_bold_headers(self, sample_story):

     

       88
       88
       +
               """Test that section headers have bold facets."""

     

       89
       89
       +
               formatter = RichTextFormatter()

     

       90
       90
       +
               result = formatter.format_full(sample_story)

     

       91
       91
       +
       

     

       92
       92
       +
               # Find bold facets

     

       93
       93
       +
               bold_facets = [

     

       94
       94
       +
                   f for f in result['facets']

     

       95
       95
       +
                   if any(feat.get('$type') == 'social.coves.richtext.facet#bold'

     

       96
       96
       +
                          for feat in f['features'])

     

       97
       97
       +
               ]

     

       98
       98
       +
       

     

       99
       99
       +
               assert len(bold_facets) > 0

     

       100
       100
       +
       

     

       101
       101
       +
               # Check that "Highlights:" is bolded

     

       102
       102
       +
               content = result['content']

     

       103
       103
       +
               highlights_pos = content.find("Highlights:")

     

       104
       104
       +
       

     

       105
       105
       +
               # Should have a bold facet covering "Highlights:"

     

       106
       106
       +
               has_highlights_bold = any(

     

       107
       107
       +
                   f['index']['byteStart'] <= highlights_pos and

     

       108
       108
       +
                   f['index']['byteEnd'] >= highlights_pos + len("Highlights:")

     

       109
       109
       +
                   for f in bold_facets

     

       110
       110
       +
               )

     

       111
       111
       +
               assert has_highlights_bold

     

       112
       112
       +
       

     

       113
       113
       +
           def test_facets_for_italic_quote(self, sample_story):

     

       114
       114
       +
               """Test that quotes have italic facets."""

     

       115
       115
       +
               formatter = RichTextFormatter()

     

       116
       116
       +
               result = formatter.format_full(sample_story)

     

       117
       117
       +
       

     

       118
       118
       +
               # Find italic facets

     

       119
       119
       +
               italic_facets = [

     

       120
       120
       +
                   f for f in result['facets']

     

       121
       121
       +
                   if any(feat.get('$type') == 'social.coves.richtext.facet#italic'

     

       122
       122
       +
                          for feat in f['features'])

     

       123
       123
       +
               ]

     

       124
       124
       +
       

     

       125
       125
       +
               assert len(italic_facets) > 0

     

       126
       126
       +
       

     

       127
       127
       +
               # The quote text is wrapped with quotes, so search for that

     

       128
       128
       +
               content = result['content']

     

       129
       129
       +
               quote_with_quotes = f'"{sample_story.quote.text}"'

     

       130
       130
       +
               quote_char_pos = content.find(quote_with_quotes)

     

       131
       131
       +
       

     

       132
       132
       +
               # Convert character position to byte position

     

       133
       133
       +
               quote_byte_start = len(content[:quote_char_pos].encode('utf-8'))

     

       134
       134
       +
               quote_byte_end = len(content[:quote_char_pos + len(quote_with_quotes)].encode('utf-8'))

     

       135
       135
       +
       

     

       136
       136
       +
               has_quote_italic = any(

     

       137
       137
       +
                   f['index']['byteStart'] <= quote_byte_start and

     

       138
       138
       +
                   f['index']['byteEnd'] >= quote_byte_end

     

       139
       139
       +
                   for f in italic_facets

     

       140
       140
       +
               )

     

       141
       141
       +
               assert has_quote_italic

     

       142
       142
       +
       

     

       143
       143
       +
           def test_facets_for_links(self, sample_story):

     

       144
       144
       +
               """Test that URLs have link facets."""

     

       145
       145
       +
               formatter = RichTextFormatter()

     

       146
       146
       +
               result = formatter.format_full(sample_story)

     

       147
       147
       +
       

     

       148
       148
       +
               # Find link facets

     

       149
       149
       +
               link_facets = [

     

       150
       150
       +
                   f for f in result['facets']

     

       151
       151
       +
                   if any(feat.get('$type') == 'social.coves.richtext.facet#link'

     

       152
       152
       +
                          for feat in f['features'])

     

       153
       153
       +
               ]

     

       154
       154
       +
       

     

       155
       155
       +
               # Should have links for: 2 sources + 2 perspectives + 1 Kagi News link = 5 minimum

     

       156
       156
       +
               assert len(link_facets) >= 5

     

       157
       157
       +
       

     

       158
       158
       +
               # Check that first source URL has a link facet

     

       159
       159
       +
               source_urls = [s.url for s in sample_story.sources]

     

       160
       160
       +
               for url in source_urls:

     

       161
       161
       +
                   has_link = any(

     

       162
       162
       +
                       any(feat.get('uri') == url for feat in f['features'])

     

       163
       163
       +
                       for f in link_facets

     

       164
       164
       +
                   )

     

       165
       165
       +
                   assert has_link, f"Missing link facet for {url}"

     

       166
       166
       +
       

     

       167
       167
       +
           def test_utf8_byte_positions(self):

     

       168
       168
       +
               """Test UTF-8 byte position calculation with multi-byte characters."""

     

       169
       169
       +
               # Create story with emoji and non-ASCII characters

     

       170
       170
       +
               story = KagiStory(

     

       171
       171
       +
                   title="Test 👋 Story",

     

       172
       172
       +
                   link="https://test.com",

     

       173
       173
       +
                   guid="https://test.com",

     

       174
       174
       +
                   pub_date=datetime.now(),

     

       175
       175
       +
                   categories=["Test"],

     

       176
       176
       +
                   summary="Hello 世界 this is a test with emoji 🎉",

     

       177
       177
       +
                   highlights=["Test highlight"],

     

       178
       178
       +
                   perspectives=[],

     

       179
       179
       +
                   quote=None,

     

       180
       180
       +
                   sources=[],

     

       181
       181
       +
               )

     

       182
       182
       +
       

     

       183
       183
       +
               formatter = RichTextFormatter()

     

       184
       184
       +
               result = formatter.format_full(story)

     

       185
       185
       +
       

     

       186
       186
       +
               # Verify content contains the emoji

     

       187
       187
       +
               assert "👋" in result['content'] or "🎉" in result['content']

     

       188
       188
       +
       

     

       189
       189
       +
               # Verify all facet byte positions are valid

     

       190
       190
       +
               content_bytes = result['content'].encode('utf-8')

     

       191
       191
       +
               for facet in result['facets']:

     

       192
       192
       +
                   start = facet['index']['byteStart']

     

       193
       193
       +
                   end = facet['index']['byteEnd']

     

       194
       194
       +
       

     

       195
       195
       +
                   # Positions should be within bounds

     

       196
       196
       +
                   assert 0 <= start < len(content_bytes)

     

       197
       197
       +
                   assert start < end <= len(content_bytes)

     

       198
       198
       +
       

     

       199
       199
       +
           def test_format_story_without_optional_fields(self):

     

       200
       200
       +
               """Test formatting story with missing optional fields."""

     

       201
       201
       +
               minimal_story = KagiStory(

     

       202
       202
       +
                   title="Minimal Story",

     

       203
       203
       +
                   link="https://test.com",

     

       204
       204
       +
                   guid="https://test.com",

     

       205
       205
       +
                   pub_date=datetime.now(),

     

       206
       206
       +
                   categories=["Test"],

     

       207
       207
       +
                   summary="Just a summary.",

     

       208
       208
       +
                   highlights=[],  # Empty

     

       209
       209
       +
                   perspectives=[],  # Empty

     

       210
       210
       +
                   quote=None,  # Missing

     

       211
       211
       +
                   sources=[],  # Empty

     

       212
       212
       +
               )

     

       213
       213
       +
       

     

       214
       214
       +
               formatter = RichTextFormatter()

     

       215
       215
       +
               result = formatter.format_full(minimal_story)

     

       216
       216
       +
       

     

       217
       217
       +
               # Should still have content and facets

     

       218
       218
       +
               assert result['content']

     

       219
       219
       +
               assert result['facets']

     

       220
       220
       +
       

     

       221
       221
       +
               # Should have summary

     

       222
       222
       +
               assert "Just a summary." in result['content']

     

       223
       223
       +
       

     

       224
       224
       +
               # Should NOT have empty sections

     

       225
       225
       +
               assert "Highlights:" not in result['content']

     

       226
       226
       +
               assert "Perspectives:" not in result['content']

     

       227
       227
       +
       

     

       228
       228
       +
           def test_perspective_actor_is_bolded(self, sample_story):

     

       229
       229
       +
               """Test that perspective actor names are bolded."""

     

       230
       230
       +
               formatter = RichTextFormatter()

     

       231
       231
       +
               result = formatter.format_full(sample_story)

     

       232
       232
       +
       

     

       233
       233
       +
               content = result['content']

     

       234
       234
       +
               bold_facets = [

     

       235
       235
       +
                   f for f in result['facets']

     

       236
       236
       +
                   if any(feat.get('$type') == 'social.coves.richtext.facet#bold'

     

       237
       237
       +
                          for feat in f['features'])

     

       238
       238
       +
               ]

     

       239
       239
       +
       

     

       240
       240
       +
               # Find "President Trump:" in perspectives section

     

       241
       241
       +
               actor = "President Trump:"

     

       242
       242
       +
               perspectives_start = content.find("Perspectives:")

     

       243
       243
       +
               actor_char_pos = content.find(actor, perspectives_start)

     

       244
       244
       +
       

     

       245
       245
       +
               if actor_char_pos != -1:  # If found in perspectives

     

       246
       246
       +
                   # Convert character position to byte position

     

       247
       247
       +
                   actor_byte_start = len(content[:actor_char_pos].encode('utf-8'))

     

       248
       248
       +
                   actor_byte_end = len(content[:actor_char_pos + len(actor)].encode('utf-8'))

     

       249
       249
       +
       

     

       250
       250
       +
                   has_actor_bold = any(

     

       251
       251
       +
                       f['index']['byteStart'] <= actor_byte_start and

     

       252
       252
       +
                       f['index']['byteEnd'] >= actor_byte_end

     

       253
       253
       +
                       for f in bold_facets

     

       254
       254
       +
                   )

     

       255
       255
       +
                   assert has_actor_bold

     

       256
       256
       +
       

     

       257
       257
       +
           def test_kagi_attribution_link(self, sample_story):

     

       258
       258
       +
               """Test that Kagi News attribution has a link to the story."""

     

       259
       259
       +
               formatter = RichTextFormatter()

     

       260
       260
       +
               result = formatter.format_full(sample_story)

     

       261
       261
       +
       

     

       262
       262
       +
               # Should have link to Kagi story

     

       263
       263
       +
               link_facets = [

     

       264
       264
       +
                   f for f in result['facets']

     

       265
       265
       +
                   if any(feat.get('$type') == 'social.coves.richtext.facet#link'

     

       266
       266
       +
                          for feat in f['features'])

     

       267
       267
       +
               ]

     

       268
       268
       +
       

     

       269
       269
       +
               # Find link to the Kagi story URL

     

       270
       270
       +
               kagi_link = any(

     

       271
       271
       +
                   any(feat.get('uri') == sample_story.link for feat in f['features'])

     

       272
       272
       +
                   for f in link_facets

     

       273
       273
       +
               )

     

       274
       274
       +
               assert kagi_link, "Missing link to Kagi story in attribution"

     

       275
       275
       +
       

     

       276
       276
       +
           def test_facets_do_not_overlap(self, sample_story):

     

       277
       277
       +
               """Test that facets with same feature type don't overlap."""

     

       278
       278
       +
               formatter = RichTextFormatter()

     

       279
       279
       +
               result = formatter.format_full(sample_story)

     

       280
       280
       +
       

     

       281
       281
       +
               # Group facets by type

     

       282
       282
       +
               facets_by_type = {}

     

       283
       283
       +
               for facet in result['facets']:

     

       284
       284
       +
                   for feature in facet['features']:

     

       285
       285
       +
                       ftype = feature['$type']

     

       286
       286
       +
                       if ftype not in facets_by_type:

     

       287
       287
       +
                           facets_by_type[ftype] = []

     

       288
       288
       +
                       facets_by_type[ftype].append(facet)

     

       289
       289
       +
       

     

       290
       290
       +
               # Check for overlaps within each type

     

       291
       291
       +
               for ftype, facets in facets_by_type.items():

     

       292
       292
       +
                   for i, f1 in enumerate(facets):

     

       293
       293
       +
                       for f2 in facets[i+1:]:

     

       294
       294
       +
                           start1, end1 = f1['index']['byteStart'], f1['index']['byteEnd']

     

       295
       295
       +
                           start2, end2 = f2['index']['byteStart'], f2['index']['byteEnd']

     

       296
       296
       +
       

     

       297
       297
       +
                           # Check if they overlap

     

       298
       298
       +
                           overlaps = (start1 < end2 and start2 < end1)

     

       299
       299
       +
                           assert not overlaps, f"Overlapping facets of type {ftype}: {f1} and {f2}"

+91

aggregators/kagi-news/tests/test_rss_fetcher.py

···

       1
       1
       +
       """

     

       2
       2
       +
       Tests for RSS feed fetching functionality.

     

       3
       3
       +
       """

     

       4
       4
       +
       import pytest

     

       5
       5
       +
       import responses

     

       6
       6
       +
       from pathlib import Path

     

       7
       7
       +
       

     

       8
       8
       +
       from src.rss_fetcher import RSSFetcher

     

       9
       9
       +
       

     

       10
       10
       +
       

     

       11
       11
       +
       @pytest.fixture

     

       12
       12
       +
       def sample_rss_feed():

     

       13
       13
       +
           """Load sample RSS feed from fixtures."""

     

       14
       14
       +
           fixture_path = Path(__file__).parent / "fixtures" / "world.xml"

     

       15
       15
       +
           # For now, use a minimal test feed

     

       16
       16
       +
           return """<?xml version='1.0' encoding='UTF-8'?>

     

       17
       17
       +
       <rss version="2.0">

     

       18
       18
       +
         <channel>

     

       19
       19
       +
           <title>Kagi News - World</title>

     

       20
       20
       +
           <item>

     

       21
       21
       +
             <title>Test Story</title>

     

       22
       22
       +
             <link>https://kite.kagi.com/test/world/1</link>

     

       23
       23
       +
             <guid>https://kite.kagi.com/test/world/1</guid>

     

       24
       24
       +
             <pubDate>Fri, 24 Oct 2025 12:00:00 +0000</pubDate>

     

       25
       25
       +
             <category>World</category>

     

       26
       26
       +
           </item>

     

       27
       27
       +
         </channel>

     

       28
       28
       +
       </rss>"""

     

       29
       29
       +
       

     

       30
       30
       +
       

     

       31
       31
       +
       class TestRSSFetcher:

     

       32
       32
       +
           """Test suite for RSSFetcher."""

     

       33
       33
       +
       

     

       34
       34
       +
           @responses.activate

     

       35
       35
       +
           def test_fetch_feed_success(self, sample_rss_feed):

     

       36
       36
       +
               """Test successful RSS feed fetch."""

     

       37
       37
       +
               url = "https://news.kagi.com/world.xml"

     

       38
       38
       +
               responses.add(responses.GET, url, body=sample_rss_feed, status=200)

     

       39
       39
       +
       

     

       40
       40
       +
               fetcher = RSSFetcher()

     

       41
       41
       +
               feed = fetcher.fetch_feed(url)

     

       42
       42
       +
       

     

       43
       43
       +
               assert feed is not None

     

       44
       44
       +
               assert feed.feed.title == "Kagi News - World"

     

       45
       45
       +
               assert len(feed.entries) == 1

     

       46
       46
       +
               assert feed.entries[0].title == "Test Story"

     

       47
       47
       +
       

     

       48
       48
       +
           @responses.activate

     

       49
       49
       +
           def test_fetch_feed_timeout(self):

     

       50
       50
       +
               """Test fetch with timeout."""

     

       51
       51
       +
               url = "https://news.kagi.com/world.xml"

     

       52
       52
       +
               responses.add(responses.GET, url, body="timeout", status=408)

     

       53
       53
       +
       

     

       54
       54
       +
               fetcher = RSSFetcher(timeout=5)

     

       55
       55
       +
       

     

       56
       56
       +
               with pytest.raises(Exception):  # Should raise on timeout

     

       57
       57
       +
                   fetcher.fetch_feed(url)

     

       58
       58
       +
       

     

       59
       59
       +
           @responses.activate

     

       60
       60
       +
           def test_fetch_feed_with_retry(self, sample_rss_feed):

     

       61
       61
       +
               """Test fetch with retry on failure then success."""

     

       62
       62
       +
               url = "https://news.kagi.com/world.xml"

     

       63
       63
       +
       

     

       64
       64
       +
               # First call fails, second succeeds

     

       65
       65
       +
               responses.add(responses.GET, url, body="error", status=500)

     

       66
       66
       +
               responses.add(responses.GET, url, body=sample_rss_feed, status=200)

     

       67
       67
       +
       

     

       68
       68
       +
               fetcher = RSSFetcher(max_retries=2)

     

       69
       69
       +
               feed = fetcher.fetch_feed(url)

     

       70
       70
       +
       

     

       71
       71
       +
               assert feed is not None

     

       72
       72
       +
               assert len(feed.entries) == 1

     

       73
       73
       +
       

     

       74
       74
       +
           @responses.activate

     

       75
       75
       +
           def test_fetch_feed_invalid_xml(self):

     

       76
       76
       +
               """Test handling of invalid XML."""

     

       77
       77
       +
               url = "https://news.kagi.com/world.xml"

     

       78
       78
       +
               responses.add(responses.GET, url, body="Not valid XML!", status=200)

     

       79
       79
       +
       

     

       80
       80
       +
               fetcher = RSSFetcher()

     

       81
       81
       +
               feed = fetcher.fetch_feed(url)

     

       82
       82
       +
       

     

       83
       83
       +
               # feedparser is lenient, but should have bozo flag set

     

       84
       84
       +
               assert feed.bozo == 1  # feedparser uses 1 for True

     

       85
       85
       +
       

     

       86
       86
       +
           def test_fetch_feed_requires_url(self):

     

       87
       87
       +
               """Test that fetch_feed requires a URL."""

     

       88
       88
       +
               fetcher = RSSFetcher()

     

       89
       89
       +
       

     

       90
       90
       +
               with pytest.raises((ValueError, TypeError)):

     

       91
       91
       +
                   fetcher.fetch_feed("")

+227

aggregators/kagi-news/tests/test_state_manager.py

···

       1
       1
       +
       """

     

       2
       2
       +
       Tests for State Manager.

     

       3
       3
       +
       

     

       4
       4
       +
       Tests deduplication state tracking and persistence.

     

       5
       5
       +
       """

     

       6
       6
       +
       import pytest

     

       7
       7
       +
       import json

     

       8
       8
       +
       import tempfile

     

       9
       9
       +
       from pathlib import Path

     

       10
       10
       +
       from datetime import datetime, timedelta

     

       11
       11
       +
       

     

       12
       12
       +
       from src.state_manager import StateManager

     

       13
       13
       +
       

     

       14
       14
       +
       

     

       15
       15
       +
       @pytest.fixture

     

       16
       16
       +
       def temp_state_file():

     

       17
       17
       +
           """Create a temporary state file for testing."""

     

       18
       18
       +
           with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:

     

       19
       19
       +
               temp_path = Path(f.name)

     

       20
       20
       +
           yield temp_path

     

       21
       21
       +
           # Cleanup

     

       22
       22
       +
           if temp_path.exists():

     

       23
       23
       +
               temp_path.unlink()

     

       24
       24
       +
       

     

       25
       25
       +
       

     

       26
       26
       +
       class TestStateManager:

     

       27
       27
       +
           """Test suite for StateManager."""

     

       28
       28
       +
       

     

       29
       29
       +
           def test_initialize_new_state_file(self, temp_state_file):

     

       30
       30
       +
               """Test initializing a new state file."""

     

       31
       31
       +
               manager = StateManager(temp_state_file)

     

       32
       32
       +
       

     

       33
       33
       +
               # Should create an empty state

     

       34
       34
       +
               assert temp_state_file.exists()

     

       35
       35
       +
               state = json.loads(temp_state_file.read_text())

     

       36
       36
       +
               assert 'feeds' in state

     

       37
       37
       +
               assert state['feeds'] == {}

     

       38
       38
       +
       

     

       39
       39
       +
           def test_is_posted_returns_false_for_new_guid(self, temp_state_file):

     

       40
       40
       +
               """Test that is_posted returns False for new GUIDs."""

     

       41
       41
       +
               manager = StateManager(temp_state_file)

     

       42
       42
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       43
       43
       +
               guid = "https://kite.kagi.com/test/world/1"

     

       44
       44
       +
       

     

       45
       45
       +
               assert not manager.is_posted(feed_url, guid)

     

       46
       46
       +
       

     

       47
       47
       +
           def test_mark_posted_stores_guid(self, temp_state_file):

     

       48
       48
       +
               """Test that mark_posted stores GUIDs."""

     

       49
       49
       +
               manager = StateManager(temp_state_file)

     

       50
       50
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       51
       51
       +
               guid = "https://kite.kagi.com/test/world/1"

     

       52
       52
       +
               post_uri = "at://did:plc:test/social.coves.post/abc123"

     

       53
       53
       +
       

     

       54
       54
       +
               manager.mark_posted(feed_url, guid, post_uri)

     

       55
       55
       +
       

     

       56
       56
       +
               # Should now return True

     

       57
       57
       +
               assert manager.is_posted(feed_url, guid)

     

       58
       58
       +
       

     

       59
       59
       +
           def test_state_persists_across_instances(self, temp_state_file):

     

       60
       60
       +
               """Test that state persists when creating new instances."""

     

       61
       61
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       62
       62
       +
               guid = "https://kite.kagi.com/test/world/1"

     

       63
       63
       +
               post_uri = "at://did:plc:test/social.coves.post/abc123"

     

       64
       64
       +
       

     

       65
       65
       +
               # First instance marks as posted

     

       66
       66
       +
               manager1 = StateManager(temp_state_file)

     

       67
       67
       +
               manager1.mark_posted(feed_url, guid, post_uri)

     

       68
       68
       +
       

     

       69
       69
       +
               # Second instance should see the same state

     

       70
       70
       +
               manager2 = StateManager(temp_state_file)

     

       71
       71
       +
               assert manager2.is_posted(feed_url, guid)

     

       72
       72
       +
       

     

       73
       73
       +
           def test_track_last_run_timestamp(self, temp_state_file):

     

       74
       74
       +
               """Test tracking last successful run timestamp."""

     

       75
       75
       +
               manager = StateManager(temp_state_file)

     

       76
       76
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       77
       77
       +
               timestamp = datetime.now()

     

       78
       78
       +
       

     

       79
       79
       +
               manager.update_last_run(feed_url, timestamp)

     

       80
       80
       +
       

     

       81
       81
       +
               retrieved = manager.get_last_run(feed_url)

     

       82
       82
       +
               assert retrieved is not None

     

       83
       83
       +
               # Compare timestamps (allow small difference due to serialization)

     

       84
       84
       +
               assert abs((retrieved - timestamp).total_seconds()) < 1

     

       85
       85
       +
       

     

       86
       86
       +
           def test_get_last_run_returns_none_for_new_feed(self, temp_state_file):

     

       87
       87
       +
               """Test that get_last_run returns None for new feeds."""

     

       88
       88
       +
               manager = StateManager(temp_state_file)

     

       89
       89
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       90
       90
       +
       

     

       91
       91
       +
               assert manager.get_last_run(feed_url) is None

     

       92
       92
       +
       

     

       93
       93
       +
           def test_cleanup_old_guids(self, temp_state_file):

     

       94
       94
       +
               """Test cleanup of old GUIDs (> 30 days)."""

     

       95
       95
       +
               manager = StateManager(temp_state_file)

     

       96
       96
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       97
       97
       +
       

     

       98
       98
       +
               # Add recent GUID

     

       99
       99
       +
               recent_guid = "https://kite.kagi.com/test/world/1"

     

       100
       100
       +
               manager.mark_posted(feed_url, recent_guid, "at://test/1")

     

       101
       101
       +
       

     

       102
       102
       +
               # Manually add old GUID (> 30 days)

     

       103
       103
       +
               old_timestamp = (datetime.now() - timedelta(days=31)).isoformat()

     

       104
       104
       +
               state_data = json.loads(temp_state_file.read_text())

     

       105
       105
       +
               state_data['feeds'][feed_url]['posted_guids'].append({

     

       106
       106
       +
                   'guid': 'https://kite.kagi.com/test/world/old',

     

       107
       107
       +
                   'post_uri': 'at://test/old',

     

       108
       108
       +
                   'posted_at': old_timestamp

     

       109
       109
       +
               })

     

       110
       110
       +
               temp_state_file.write_text(json.dumps(state_data, indent=2))

     

       111
       111
       +
       

     

       112
       112
       +
               # Reload and cleanup

     

       113
       113
       +
               manager = StateManager(temp_state_file)

     

       114
       114
       +
               manager.cleanup_old_entries(feed_url)

     

       115
       115
       +
       

     

       116
       116
       +
               # Recent GUID should still be there

     

       117
       117
       +
               assert manager.is_posted(feed_url, recent_guid)

     

       118
       118
       +
       

     

       119
       119
       +
               # Old GUID should be removed

     

       120
       120
       +
               assert not manager.is_posted(feed_url, 'https://kite.kagi.com/test/world/old')

     

       121
       121
       +
       

     

       122
       122
       +
           def test_limit_guids_to_100_per_feed(self, temp_state_file):

     

       123
       123
       +
               """Test that only last 100 GUIDs are kept per feed."""

     

       124
       124
       +
               manager = StateManager(temp_state_file)

     

       125
       125
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       126
       126
       +
       

     

       127
       127
       +
               # Add 150 GUIDs

     

       128
       128
       +
               for i in range(150):

     

       129
       129
       +
                   guid = f"https://kite.kagi.com/test/world/{i}"

     

       130
       130
       +
                   manager.mark_posted(feed_url, guid, f"at://test/{i}")

     

       131
       131
       +
       

     

       132
       132
       +
               # Cleanup (should limit to 100)

     

       133
       133
       +
               manager.cleanup_old_entries(feed_url)

     

       134
       134
       +
       

     

       135
       135
       +
               # Reload state

     

       136
       136
       +
               manager = StateManager(temp_state_file)

     

       137
       137
       +
       

     

       138
       138
       +
               # Should have exactly 100 entries (most recent)

     

       139
       139
       +
               state_data = json.loads(temp_state_file.read_text())

     

       140
       140
       +
               assert len(state_data['feeds'][feed_url]['posted_guids']) == 100

     

       141
       141
       +
       

     

       142
       142
       +
               # Oldest entries should be removed

     

       143
       143
       +
               assert not manager.is_posted(feed_url, "https://kite.kagi.com/test/world/0")

     

       144
       144
       +
               assert not manager.is_posted(feed_url, "https://kite.kagi.com/test/world/49")

     

       145
       145
       +
       

     

       146
       146
       +
               # Recent entries should still be there

     

       147
       147
       +
               assert manager.is_posted(feed_url, "https://kite.kagi.com/test/world/149")

     

       148
       148
       +
               assert manager.is_posted(feed_url, "https://kite.kagi.com/test/world/100")

     

       149
       149
       +
       

     

       150
       150
       +
           def test_multiple_feeds_tracked_separately(self, temp_state_file):

     

       151
       151
       +
               """Test that multiple feeds are tracked independently."""

     

       152
       152
       +
               manager = StateManager(temp_state_file)

     

       153
       153
       +
       

     

       154
       154
       +
               feed1 = "https://news.kagi.com/world.xml"

     

       155
       155
       +
               feed2 = "https://news.kagi.com/tech.xml"

     

       156
       156
       +
               guid1 = "https://kite.kagi.com/test/world/1"

     

       157
       157
       +
               guid2 = "https://kite.kagi.com/test/tech/1"

     

       158
       158
       +
       

     

       159
       159
       +
               manager.mark_posted(feed1, guid1, "at://test/1")

     

       160
       160
       +
               manager.mark_posted(feed2, guid2, "at://test/2")

     

       161
       161
       +
       

     

       162
       162
       +
               # Each feed should only know about its own GUIDs

     

       163
       163
       +
               assert manager.is_posted(feed1, guid1)

     

       164
       164
       +
               assert not manager.is_posted(feed1, guid2)

     

       165
       165
       +
       

     

       166
       166
       +
               assert manager.is_posted(feed2, guid2)

     

       167
       167
       +
               assert not manager.is_posted(feed2, guid1)

     

       168
       168
       +
       

     

       169
       169
       +
           def test_get_posted_count(self, temp_state_file):

     

       170
       170
       +
               """Test getting count of posted items per feed."""

     

       171
       171
       +
               manager = StateManager(temp_state_file)

     

       172
       172
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       173
       173
       +
       

     

       174
       174
       +
               # Initially 0

     

       175
       175
       +
               assert manager.get_posted_count(feed_url) == 0

     

       176
       176
       +
       

     

       177
       177
       +
               # Add 5 items

     

       178
       178
       +
               for i in range(5):

     

       179
       179
       +
                   manager.mark_posted(feed_url, f"guid-{i}", f"post-{i}")

     

       180
       180
       +
       

     

       181
       181
       +
               assert manager.get_posted_count(feed_url) == 5

     

       182
       182
       +
       

     

       183
       183
       +
           def test_state_file_format_is_valid_json(self, temp_state_file):

     

       184
       184
       +
               """Test that state file is always valid JSON."""

     

       185
       185
       +
               manager = StateManager(temp_state_file)

     

       186
       186
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       187
       187
       +
       

     

       188
       188
       +
               manager.mark_posted(feed_url, "test-guid", "test-post-uri")

     

       189
       189
       +
               manager.update_last_run(feed_url, datetime.now())

     

       190
       190
       +
       

     

       191
       191
       +
               # Should be valid JSON

     

       192
       192
       +
               with open(temp_state_file) as f:

     

       193
       193
       +
                   state = json.load(f)

     

       194
       194
       +
       

     

       195
       195
       +
               assert 'feeds' in state

     

       196
       196
       +
               assert feed_url in state['feeds']

     

       197
       197
       +
               assert 'posted_guids' in state['feeds'][feed_url]

     

       198
       198
       +
               assert 'last_successful_run' in state['feeds'][feed_url]

     

       199
       199
       +
       

     

       200
       200
       +
           def test_automatic_cleanup_on_mark_posted(self, temp_state_file):

     

       201
       201
       +
               """Test that cleanup happens automatically when marking posted."""

     

       202
       202
       +
               manager = StateManager(temp_state_file)

     

       203
       203
       +
               feed_url = "https://news.kagi.com/world.xml"

     

       204
       204
       +
       

     

       205
       205
       +
               # Add old entry manually

     

       206
       206
       +
               old_timestamp = (datetime.now() - timedelta(days=31)).isoformat()

     

       207
       207
       +
               state_data = {

     

       208
       208
       +
                   'feeds': {

     

       209
       209
       +
                       feed_url: {

     

       210
       210
       +
                           'posted_guids': [{

     

       211
       211
       +
                               'guid': 'old-guid',

     

       212
       212
       +
                               'post_uri': 'old-uri',

     

       213
       213
       +
                               'posted_at': old_timestamp

     

       214
       214
       +
                           }],

     

       215
       215
       +
                           'last_successful_run': None

     

       216
       216
       +
                       }

     

       217
       217
       +
                   }

     

       218
       218
       +
               }

     

       219
       219
       +
               temp_state_file.write_text(json.dumps(state_data, indent=2))

     

       220
       220
       +
       

     

       221
       221
       +
               # Reload and add new entry (should trigger cleanup)

     

       222
       222
       +
               manager = StateManager(temp_state_file)

     

       223
       223
       +
               manager.mark_posted(feed_url, "new-guid", "new-uri")

     

       224
       224
       +
       

     

       225
       225
       +
               # Old entry should be gone

     

       226
       226
       +
               assert not manager.is_posted(feed_url, "old-guid")

     

       227
       227
       +
               assert manager.is_posted(feed_url, "new-guid")