Atom feed for our EEG site
1# /// script
2# requires-python = ">=3.11"
3# dependencies = [
4# "feedparser",
5# "feedgenerator",
6# "requests",
7# ]
8# ///
9# Do not delete the above as its needed for `uv run`
10#!/usr/bin/env python3
11
12import json
13import feedparser
14import datetime
15from time import mktime
16from feedgenerator import Atom1Feed
17import requests
18import sys
19import os
20
21def load_feed_urls(file_path):
22 with open(file_path, 'r') as f:
23 data = json.load(f)
24 return [item['url'] for item in data]
25
26def load_mapping(file_path):
27 if not os.path.exists(file_path):
28 return {}
29
30 with open(file_path, 'r') as f:
31 return json.load(f)
32
33def get_feed_data(url, mapping):
34 try:
35 response = requests.get(url, timeout=30)
36 response.raise_for_status()
37 feed_data = feedparser.parse(response.content)
38 print(f"Fetched {url}: found {len(feed_data.entries)} entries", file=sys.stderr)
39
40 # Add mapping info to feed_data
41 if url in mapping:
42 feed_data.mapping = mapping[url]
43 else:
44 feed_data.mapping = None
45
46 return feed_data
47 except Exception as e:
48 print(f"Error fetching {url}: {e}", file=sys.stderr)
49 return None
50
51def extract_entries(feeds):
52 all_entries = []
53 for feed_data in feeds:
54 if not feed_data or not hasattr(feed_data, 'entries'):
55 continue
56
57 # Get feed title and handle mapping
58 feed_title = feed_data.feed.get('title', 'Unknown Source')
59 author_name = 'Unknown'
60
61 if hasattr(feed_data, 'mapping') and feed_data.mapping:
62 author_name = feed_data.mapping.get('name', 'Unknown')
63 # Optionally use mapped site name
64 # feed_title = feed_data.mapping.get('site', feed_title)
65
66 print(f"Processing feed: {feed_title} ({len(feed_data.entries)} entries)", file=sys.stderr)
67
68 for entry in feed_data.entries:
69 # Get publication date
70 pub_date = None
71 if hasattr(entry, 'published_parsed') and entry.published_parsed:
72 pub_date = datetime.datetime.fromtimestamp(mktime(entry.published_parsed))
73 elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
74 pub_date = datetime.datetime.fromtimestamp(mktime(entry.updated_parsed))
75
76 if not pub_date:
77 print(f"Skipping entry without date: {entry.get('title', 'Unknown')}", file=sys.stderr)
78 continue
79
80 # Get title
81 title = entry.get('title', 'No title')
82
83 # Get link
84 link = entry.get('link', '')
85
86 # Get description/content
87 if hasattr(entry, 'content') and entry.content:
88 content = entry.content[0].value
89 else:
90 content = entry.get('summary', '')
91
92 # Get unique ID
93 entry_id = entry.get('id', link)
94
95 all_entries.append({
96 'title': title,
97 'link': link,
98 'content': content,
99 'author': author_name,
100 'pub_date': pub_date,
101 'feed_title': feed_title,
102 'id': entry_id
103 })
104
105 # Sort by publication date (newest first)
106 sorted_entries = sorted(all_entries, key=lambda x: x['pub_date'], reverse=True)
107 print(f"Total entries after sorting: {len(sorted_entries)}", file=sys.stderr)
108 return sorted_entries
109
110def create_atom_feed(entries):
111 feed = Atom1Feed(
112 title="Atomic EEG",
113 link="https://example.com/", # Placeholder link
114 description="Aggregated Atom feeds",
115 language="en",
116 author_name="Feed Aggregator",
117 feed_url="https://example.com/eeg.xml" # Placeholder feed URL
118 )
119
120 for entry in entries:
121 feed.add_item(
122 title=entry['title'],
123 link=entry['link'],
124 description=entry['content'],
125 author_name=entry['author'],
126 pubdate=entry['pub_date'],
127 unique_id=entry['id'],
128 categories=[entry['feed_title']] # Use feed title as category for attribution
129 )
130
131 return feed
132
133def main():
134 # Load feed URLs
135 feed_urls = load_feed_urls('feed.json')
136
137 # Load mapping
138 mapping = load_mapping('mapping.json')
139
140 # Fetch feed data
141 print(f"Fetching {len(feed_urls)} feeds...", file=sys.stderr)
142 feeds = []
143 for url in feed_urls:
144 feed_data = get_feed_data(url, mapping)
145 if feed_data:
146 feeds.append(feed_data)
147
148 # Extract and sort entries
149 print("Processing entries...", file=sys.stderr)
150 entries = extract_entries(feeds)
151 print(f"Found {len(entries)} entries to include in feed", file=sys.stderr)
152
153 # Create aggregated feed
154 feed = create_atom_feed(entries)
155
156 # Write to file
157 with open('eeg.xml', 'w') as f:
158 feed.write(f, 'utf-8')
159
160 print(f"Feed successfully written to eeg.xml", file=sys.stderr)
161
162if __name__ == "__main__":
163 main()