A community based topic aggregation platform built on atproto
at main 8.6 kB view raw
1""" 2Tests for State Manager. 3 4Tests deduplication state tracking and persistence. 5""" 6import pytest 7import json 8import tempfile 9from pathlib import Path 10from datetime import datetime, timedelta 11 12from src.state_manager import StateManager 13 14 15@pytest.fixture 16def temp_state_file(): 17 """Create a temporary state file for testing.""" 18 with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: 19 temp_path = Path(f.name) 20 yield temp_path 21 # Cleanup 22 if temp_path.exists(): 23 temp_path.unlink() 24 25 26class TestStateManager: 27 """Test suite for StateManager.""" 28 29 def test_initialize_new_state_file(self, temp_state_file): 30 """Test initializing a new state file.""" 31 manager = StateManager(temp_state_file) 32 33 # Should create an empty state 34 assert temp_state_file.exists() 35 state = json.loads(temp_state_file.read_text()) 36 assert 'feeds' in state 37 assert state['feeds'] == {} 38 39 def test_is_posted_returns_false_for_new_guid(self, temp_state_file): 40 """Test that is_posted returns False for new GUIDs.""" 41 manager = StateManager(temp_state_file) 42 feed_url = "https://news.kagi.com/world.xml" 43 guid = "https://kite.kagi.com/test/world/1" 44 45 assert not manager.is_posted(feed_url, guid) 46 47 def test_mark_posted_stores_guid(self, temp_state_file): 48 """Test that mark_posted stores GUIDs.""" 49 manager = StateManager(temp_state_file) 50 feed_url = "https://news.kagi.com/world.xml" 51 guid = "https://kite.kagi.com/test/world/1" 52 post_uri = "at://did:plc:test/social.coves.post/abc123" 53 54 manager.mark_posted(feed_url, guid, post_uri) 55 56 # Should now return True 57 assert manager.is_posted(feed_url, guid) 58 59 def test_state_persists_across_instances(self, temp_state_file): 60 """Test that state persists when creating new instances.""" 61 feed_url = "https://news.kagi.com/world.xml" 62 guid = "https://kite.kagi.com/test/world/1" 63 post_uri = "at://did:plc:test/social.coves.post/abc123" 64 65 # First instance marks as posted 66 manager1 = StateManager(temp_state_file) 67 manager1.mark_posted(feed_url, guid, post_uri) 68 69 # Second instance should see the same state 70 manager2 = StateManager(temp_state_file) 71 assert manager2.is_posted(feed_url, guid) 72 73 def test_track_last_run_timestamp(self, temp_state_file): 74 """Test tracking last successful run timestamp.""" 75 manager = StateManager(temp_state_file) 76 feed_url = "https://news.kagi.com/world.xml" 77 timestamp = datetime.now() 78 79 manager.update_last_run(feed_url, timestamp) 80 81 retrieved = manager.get_last_run(feed_url) 82 assert retrieved is not None 83 # Compare timestamps (allow small difference due to serialization) 84 assert abs((retrieved - timestamp).total_seconds()) < 1 85 86 def test_get_last_run_returns_none_for_new_feed(self, temp_state_file): 87 """Test that get_last_run returns None for new feeds.""" 88 manager = StateManager(temp_state_file) 89 feed_url = "https://news.kagi.com/world.xml" 90 91 assert manager.get_last_run(feed_url) is None 92 93 def test_cleanup_old_guids(self, temp_state_file): 94 """Test cleanup of old GUIDs (> 30 days).""" 95 manager = StateManager(temp_state_file) 96 feed_url = "https://news.kagi.com/world.xml" 97 98 # Add recent GUID 99 recent_guid = "https://kite.kagi.com/test/world/1" 100 manager.mark_posted(feed_url, recent_guid, "at://test/1") 101 102 # Manually add old GUID (> 30 days) 103 old_timestamp = (datetime.now() - timedelta(days=31)).isoformat() 104 state_data = json.loads(temp_state_file.read_text()) 105 state_data['feeds'][feed_url]['posted_guids'].append({ 106 'guid': 'https://kite.kagi.com/test/world/old', 107 'post_uri': 'at://test/old', 108 'posted_at': old_timestamp 109 }) 110 temp_state_file.write_text(json.dumps(state_data, indent=2)) 111 112 # Reload and cleanup 113 manager = StateManager(temp_state_file) 114 manager.cleanup_old_entries(feed_url) 115 116 # Recent GUID should still be there 117 assert manager.is_posted(feed_url, recent_guid) 118 119 # Old GUID should be removed 120 assert not manager.is_posted(feed_url, 'https://kite.kagi.com/test/world/old') 121 122 def test_limit_guids_to_100_per_feed(self, temp_state_file): 123 """Test that only last 100 GUIDs are kept per feed.""" 124 manager = StateManager(temp_state_file) 125 feed_url = "https://news.kagi.com/world.xml" 126 127 # Add 150 GUIDs 128 for i in range(150): 129 guid = f"https://kite.kagi.com/test/world/{i}" 130 manager.mark_posted(feed_url, guid, f"at://test/{i}") 131 132 # Cleanup (should limit to 100) 133 manager.cleanup_old_entries(feed_url) 134 135 # Reload state 136 manager = StateManager(temp_state_file) 137 138 # Should have exactly 100 entries (most recent) 139 state_data = json.loads(temp_state_file.read_text()) 140 assert len(state_data['feeds'][feed_url]['posted_guids']) == 100 141 142 # Oldest entries should be removed 143 assert not manager.is_posted(feed_url, "https://kite.kagi.com/test/world/0") 144 assert not manager.is_posted(feed_url, "https://kite.kagi.com/test/world/49") 145 146 # Recent entries should still be there 147 assert manager.is_posted(feed_url, "https://kite.kagi.com/test/world/149") 148 assert manager.is_posted(feed_url, "https://kite.kagi.com/test/world/100") 149 150 def test_multiple_feeds_tracked_separately(self, temp_state_file): 151 """Test that multiple feeds are tracked independently.""" 152 manager = StateManager(temp_state_file) 153 154 feed1 = "https://news.kagi.com/world.xml" 155 feed2 = "https://news.kagi.com/tech.xml" 156 guid1 = "https://kite.kagi.com/test/world/1" 157 guid2 = "https://kite.kagi.com/test/tech/1" 158 159 manager.mark_posted(feed1, guid1, "at://test/1") 160 manager.mark_posted(feed2, guid2, "at://test/2") 161 162 # Each feed should only know about its own GUIDs 163 assert manager.is_posted(feed1, guid1) 164 assert not manager.is_posted(feed1, guid2) 165 166 assert manager.is_posted(feed2, guid2) 167 assert not manager.is_posted(feed2, guid1) 168 169 def test_get_posted_count(self, temp_state_file): 170 """Test getting count of posted items per feed.""" 171 manager = StateManager(temp_state_file) 172 feed_url = "https://news.kagi.com/world.xml" 173 174 # Initially 0 175 assert manager.get_posted_count(feed_url) == 0 176 177 # Add 5 items 178 for i in range(5): 179 manager.mark_posted(feed_url, f"guid-{i}", f"post-{i}") 180 181 assert manager.get_posted_count(feed_url) == 5 182 183 def test_state_file_format_is_valid_json(self, temp_state_file): 184 """Test that state file is always valid JSON.""" 185 manager = StateManager(temp_state_file) 186 feed_url = "https://news.kagi.com/world.xml" 187 188 manager.mark_posted(feed_url, "test-guid", "test-post-uri") 189 manager.update_last_run(feed_url, datetime.now()) 190 191 # Should be valid JSON 192 with open(temp_state_file) as f: 193 state = json.load(f) 194 195 assert 'feeds' in state 196 assert feed_url in state['feeds'] 197 assert 'posted_guids' in state['feeds'][feed_url] 198 assert 'last_successful_run' in state['feeds'][feed_url] 199 200 def test_automatic_cleanup_on_mark_posted(self, temp_state_file): 201 """Test that cleanup happens automatically when marking posted.""" 202 manager = StateManager(temp_state_file) 203 feed_url = "https://news.kagi.com/world.xml" 204 205 # Add old entry manually 206 old_timestamp = (datetime.now() - timedelta(days=31)).isoformat() 207 state_data = { 208 'feeds': { 209 feed_url: { 210 'posted_guids': [{ 211 'guid': 'old-guid', 212 'post_uri': 'old-uri', 213 'posted_at': old_timestamp 214 }], 215 'last_successful_run': None 216 } 217 } 218 } 219 temp_state_file.write_text(json.dumps(state_data, indent=2)) 220 221 # Reload and add new entry (should trigger cleanup) 222 manager = StateManager(temp_state_file) 223 manager.mark_posted(feed_url, "new-guid", "new-uri") 224 225 # Old entry should be gone 226 assert not manager.is_posted(feed_url, "old-guid") 227 assert manager.is_posted(feed_url, "new-guid")