A community based topic aggregation platform built on atproto
1"""
2Tests for State Manager.
3
4Tests deduplication state tracking and persistence.
5"""
6import pytest
7import json
8import tempfile
9from pathlib import Path
10from datetime import datetime, timedelta
11
12from src.state_manager import StateManager
13
14
15@pytest.fixture
16def temp_state_file():
17 """Create a temporary state file for testing."""
18 with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
19 temp_path = Path(f.name)
20 yield temp_path
21 # Cleanup
22 if temp_path.exists():
23 temp_path.unlink()
24
25
26class TestStateManager:
27 """Test suite for StateManager."""
28
29 def test_initialize_new_state_file(self, temp_state_file):
30 """Test initializing a new state file."""
31 manager = StateManager(temp_state_file)
32
33 # Should create an empty state
34 assert temp_state_file.exists()
35 state = json.loads(temp_state_file.read_text())
36 assert 'feeds' in state
37 assert state['feeds'] == {}
38
39 def test_is_posted_returns_false_for_new_guid(self, temp_state_file):
40 """Test that is_posted returns False for new GUIDs."""
41 manager = StateManager(temp_state_file)
42 feed_url = "https://news.kagi.com/world.xml"
43 guid = "https://kite.kagi.com/test/world/1"
44
45 assert not manager.is_posted(feed_url, guid)
46
47 def test_mark_posted_stores_guid(self, temp_state_file):
48 """Test that mark_posted stores GUIDs."""
49 manager = StateManager(temp_state_file)
50 feed_url = "https://news.kagi.com/world.xml"
51 guid = "https://kite.kagi.com/test/world/1"
52 post_uri = "at://did:plc:test/social.coves.post/abc123"
53
54 manager.mark_posted(feed_url, guid, post_uri)
55
56 # Should now return True
57 assert manager.is_posted(feed_url, guid)
58
59 def test_state_persists_across_instances(self, temp_state_file):
60 """Test that state persists when creating new instances."""
61 feed_url = "https://news.kagi.com/world.xml"
62 guid = "https://kite.kagi.com/test/world/1"
63 post_uri = "at://did:plc:test/social.coves.post/abc123"
64
65 # First instance marks as posted
66 manager1 = StateManager(temp_state_file)
67 manager1.mark_posted(feed_url, guid, post_uri)
68
69 # Second instance should see the same state
70 manager2 = StateManager(temp_state_file)
71 assert manager2.is_posted(feed_url, guid)
72
73 def test_track_last_run_timestamp(self, temp_state_file):
74 """Test tracking last successful run timestamp."""
75 manager = StateManager(temp_state_file)
76 feed_url = "https://news.kagi.com/world.xml"
77 timestamp = datetime.now()
78
79 manager.update_last_run(feed_url, timestamp)
80
81 retrieved = manager.get_last_run(feed_url)
82 assert retrieved is not None
83 # Compare timestamps (allow small difference due to serialization)
84 assert abs((retrieved - timestamp).total_seconds()) < 1
85
86 def test_get_last_run_returns_none_for_new_feed(self, temp_state_file):
87 """Test that get_last_run returns None for new feeds."""
88 manager = StateManager(temp_state_file)
89 feed_url = "https://news.kagi.com/world.xml"
90
91 assert manager.get_last_run(feed_url) is None
92
93 def test_cleanup_old_guids(self, temp_state_file):
94 """Test cleanup of old GUIDs (> 30 days)."""
95 manager = StateManager(temp_state_file)
96 feed_url = "https://news.kagi.com/world.xml"
97
98 # Add recent GUID
99 recent_guid = "https://kite.kagi.com/test/world/1"
100 manager.mark_posted(feed_url, recent_guid, "at://test/1")
101
102 # Manually add old GUID (> 30 days)
103 old_timestamp = (datetime.now() - timedelta(days=31)).isoformat()
104 state_data = json.loads(temp_state_file.read_text())
105 state_data['feeds'][feed_url]['posted_guids'].append({
106 'guid': 'https://kite.kagi.com/test/world/old',
107 'post_uri': 'at://test/old',
108 'posted_at': old_timestamp
109 })
110 temp_state_file.write_text(json.dumps(state_data, indent=2))
111
112 # Reload and cleanup
113 manager = StateManager(temp_state_file)
114 manager.cleanup_old_entries(feed_url)
115
116 # Recent GUID should still be there
117 assert manager.is_posted(feed_url, recent_guid)
118
119 # Old GUID should be removed
120 assert not manager.is_posted(feed_url, 'https://kite.kagi.com/test/world/old')
121
122 def test_limit_guids_to_100_per_feed(self, temp_state_file):
123 """Test that only last 100 GUIDs are kept per feed."""
124 manager = StateManager(temp_state_file)
125 feed_url = "https://news.kagi.com/world.xml"
126
127 # Add 150 GUIDs
128 for i in range(150):
129 guid = f"https://kite.kagi.com/test/world/{i}"
130 manager.mark_posted(feed_url, guid, f"at://test/{i}")
131
132 # Cleanup (should limit to 100)
133 manager.cleanup_old_entries(feed_url)
134
135 # Reload state
136 manager = StateManager(temp_state_file)
137
138 # Should have exactly 100 entries (most recent)
139 state_data = json.loads(temp_state_file.read_text())
140 assert len(state_data['feeds'][feed_url]['posted_guids']) == 100
141
142 # Oldest entries should be removed
143 assert not manager.is_posted(feed_url, "https://kite.kagi.com/test/world/0")
144 assert not manager.is_posted(feed_url, "https://kite.kagi.com/test/world/49")
145
146 # Recent entries should still be there
147 assert manager.is_posted(feed_url, "https://kite.kagi.com/test/world/149")
148 assert manager.is_posted(feed_url, "https://kite.kagi.com/test/world/100")
149
150 def test_multiple_feeds_tracked_separately(self, temp_state_file):
151 """Test that multiple feeds are tracked independently."""
152 manager = StateManager(temp_state_file)
153
154 feed1 = "https://news.kagi.com/world.xml"
155 feed2 = "https://news.kagi.com/tech.xml"
156 guid1 = "https://kite.kagi.com/test/world/1"
157 guid2 = "https://kite.kagi.com/test/tech/1"
158
159 manager.mark_posted(feed1, guid1, "at://test/1")
160 manager.mark_posted(feed2, guid2, "at://test/2")
161
162 # Each feed should only know about its own GUIDs
163 assert manager.is_posted(feed1, guid1)
164 assert not manager.is_posted(feed1, guid2)
165
166 assert manager.is_posted(feed2, guid2)
167 assert not manager.is_posted(feed2, guid1)
168
169 def test_get_posted_count(self, temp_state_file):
170 """Test getting count of posted items per feed."""
171 manager = StateManager(temp_state_file)
172 feed_url = "https://news.kagi.com/world.xml"
173
174 # Initially 0
175 assert manager.get_posted_count(feed_url) == 0
176
177 # Add 5 items
178 for i in range(5):
179 manager.mark_posted(feed_url, f"guid-{i}", f"post-{i}")
180
181 assert manager.get_posted_count(feed_url) == 5
182
183 def test_state_file_format_is_valid_json(self, temp_state_file):
184 """Test that state file is always valid JSON."""
185 manager = StateManager(temp_state_file)
186 feed_url = "https://news.kagi.com/world.xml"
187
188 manager.mark_posted(feed_url, "test-guid", "test-post-uri")
189 manager.update_last_run(feed_url, datetime.now())
190
191 # Should be valid JSON
192 with open(temp_state_file) as f:
193 state = json.load(f)
194
195 assert 'feeds' in state
196 assert feed_url in state['feeds']
197 assert 'posted_guids' in state['feeds'][feed_url]
198 assert 'last_successful_run' in state['feeds'][feed_url]
199
200 def test_automatic_cleanup_on_mark_posted(self, temp_state_file):
201 """Test that cleanup happens automatically when marking posted."""
202 manager = StateManager(temp_state_file)
203 feed_url = "https://news.kagi.com/world.xml"
204
205 # Add old entry manually
206 old_timestamp = (datetime.now() - timedelta(days=31)).isoformat()
207 state_data = {
208 'feeds': {
209 feed_url: {
210 'posted_guids': [{
211 'guid': 'old-guid',
212 'post_uri': 'old-uri',
213 'posted_at': old_timestamp
214 }],
215 'last_successful_run': None
216 }
217 }
218 }
219 temp_state_file.write_text(json.dumps(state_data, indent=2))
220
221 # Reload and add new entry (should trigger cleanup)
222 manager = StateManager(temp_state_file)
223 manager.mark_posted(feed_url, "new-guid", "new-uri")
224
225 # Old entry should be gone
226 assert not manager.is_posted(feed_url, "old-guid")
227 assert manager.is_posted(feed_url, "new-guid")