Atom feed for our EEG site
1# /// script
2# requires-python = ">=3.11"
3# dependencies = [
4# "feedparser",
5# "feedgenerator",
6# "requests",
7# ]
8# ///
9# Do not delete the above as its needed for `uv run`
10#!/usr/bin/env python3
11
12import json
13import feedparser
14import datetime
15from time import mktime
16from feedgenerator import Atom1Feed
17import requests
18import sys
19import os
20
21def load_feed_urls(file_path):
22 with open(file_path, 'r') as f:
23 data = json.load(f)
24 return [item['url'] for item in data]
25
26def load_mapping(file_path):
27 if not os.path.exists(file_path):
28 return {}
29
30 with open(file_path, 'r') as f:
31 return json.load(f)
32
33def get_feed_data(url, mapping):
34 try:
35 response = requests.get(url, timeout=30)
36 response.raise_for_status()
37 feed_data = feedparser.parse(response.content)
38 print(f"Fetched {url}: found {len(feed_data.entries)} entries", file=sys.stderr)
39
40 # Add mapping info to feed_data
41 if url in mapping:
42 feed_data.mapping = mapping[url]
43 else:
44 feed_data.mapping = None
45
46 return feed_data
47 except Exception as e:
48 print(f"Error fetching {url}: {e}", file=sys.stderr)
49 return None
50
51def extract_entries(feeds):
52 all_entries = []
53 for feed_data in feeds:
54 if not feed_data or not hasattr(feed_data, 'entries'):
55 continue
56
57 # Get feed title and handle mapping
58 feed_title = feed_data.feed.get('title', 'Unknown Source')
59 author_name = 'Unknown'
60
61 if hasattr(feed_data, 'mapping') and feed_data.mapping:
62 author_name = feed_data.mapping.get('name', 'Unknown')
63
64 print(f"Processing feed: {feed_title} ({len(feed_data.entries)} entries)", file=sys.stderr)
65
66 for entry in feed_data.entries:
67 # Get publication date
68 pub_date = None
69 if hasattr(entry, 'published_parsed') and entry.published_parsed:
70 pub_date = datetime.datetime.fromtimestamp(mktime(entry.published_parsed))
71 elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
72 pub_date = datetime.datetime.fromtimestamp(mktime(entry.updated_parsed))
73
74 if not pub_date:
75 print(f"Skipping entry without date: {entry.get('title', 'Unknown')}", file=sys.stderr)
76 continue
77
78 # Get title
79 title = entry.get('title', 'No title')
80
81 # Get link
82 link = entry.get('link', '')
83
84 # Get description/content
85 if hasattr(entry, 'content') and entry.content:
86 content = entry.content[0].value
87 else:
88 content = entry.get('summary', '')
89
90 # Get unique ID
91 entry_id = entry.get('id', link)
92
93 all_entries.append({
94 'title': title,
95 'link': link,
96 'content': content,
97 'author': author_name,
98 'pub_date': pub_date,
99 'feed_title': feed_title,
100 'id': entry_id
101 })
102
103 # Sort by publication date (newest first)
104 sorted_entries = sorted(all_entries, key=lambda x: x['pub_date'], reverse=True)
105 print(f"Total entries after sorting: {len(sorted_entries)}", file=sys.stderr)
106 return sorted_entries
107
108def format_pubdate(pubdate):
109 # Format the date with short month (three-letter)
110 return pubdate.strftime('%d %b %Y %H:%M:%S')
111
112def create_atom_feed(entries):
113 feed = Atom1Feed(
114 title="Atomic EEG",
115 link="https://example.com/", # Placeholder link
116 description="Aggregated Atom feeds",
117 language="en",
118 author_name="Feed Aggregator",
119 feed_url="https://example.com/eeg.xml" # Placeholder feed URL
120 )
121
122 for entry in entries:
123 # Format the date with short month name
124 formatted_date = format_pubdate(entry['pub_date'])
125 feed.add_item(
126 title=entry['title'],
127 link=entry['link'],
128 description=entry['content'],
129 author_name=entry['author'],
130 pubdate=entry['pub_date'],
131 unique_id=entry['id'],
132 categories=[entry['feed_title']], # Use feed title as category for attribution
133 # Add formatted date as extra field
134 updateddate=entry['pub_date'],
135 formatted_date=formatted_date
136 )
137
138 return feed
139
140def main():
141 # Load feed URLs
142 feed_urls = load_feed_urls('feed.json')
143
144 # Load mapping
145 mapping = load_mapping('mapping.json')
146
147 # Fetch feed data
148 print(f"Fetching {len(feed_urls)} feeds...", file=sys.stderr)
149 feeds = []
150 for url in feed_urls:
151 feed_data = get_feed_data(url, mapping)
152 if feed_data:
153 feeds.append(feed_data)
154
155 # Extract and sort entries
156 print("Processing entries...", file=sys.stderr)
157 entries = extract_entries(feeds)
158 print(f"Found {len(entries)} entries to include in feed", file=sys.stderr)
159
160 # Create aggregated feed
161 feed = create_atom_feed(entries)
162
163 # Write to file
164 with open('eeg.xml', 'w') as f:
165 feed.write(f, 'utf-8')
166
167 print(f"Feed successfully written to eeg.xml", file=sys.stderr)
168
169if __name__ == "__main__":
170 main()