Atom feed for our EEG site
1# /// script
2# requires-python = ">=3.11"
3# dependencies = [
4# "feedparser",
5# "feedgenerator",
6# "requests",
7# ]
8# ///
9# Do not delete the above as its needed for `uv run`
10#!/usr/bin/env python3
11
12import json
13import feedparser
14import datetime
15from time import mktime
16from feedgenerator import Atom1Feed
17import requests
18import sys
19import os
20
21def load_feed_urls(file_path):
22 with open(file_path, 'r') as f:
23 data = json.load(f)
24 return [item['url'] for item in data]
25
26def load_mapping(file_path):
27 if not os.path.exists(file_path):
28 return {}
29
30 with open(file_path, 'r') as f:
31 return json.load(f)
32
33def get_feed_data(url, mapping):
34 try:
35 response = requests.get(url, timeout=30)
36 response.raise_for_status()
37 feed_data = feedparser.parse(response.content)
38 print(f"Fetched {url}: found {len(feed_data.entries)} entries", file=sys.stderr)
39
40 # Add mapping info to feed_data
41 if url in mapping:
42 feed_data.mapping = mapping[url]
43 else:
44 feed_data.mapping = None
45
46 return feed_data
47 except Exception as e:
48 print(f"Error fetching {url}: {e}", file=sys.stderr)
49 return None
50
51def extract_entries(feeds):
52 all_entries = []
53 for feed_data in feeds:
54 if not feed_data or not hasattr(feed_data, 'entries'):
55 continue
56
57 # Get feed title and handle mapping
58 feed_title = feed_data.feed.get('title', 'Unknown Source')
59 author_name = 'Unknown'
60
61 if hasattr(feed_data, 'mapping') and feed_data.mapping:
62 author_name = feed_data.mapping.get('name', 'Unknown')
63 # Optionally use mapped site name
64 # feed_title = feed_data.mapping.get('site', feed_title)
65
66 print(f"Processing feed: {feed_title} ({len(feed_data.entries)} entries)", file=sys.stderr)
67
68 for entry in feed_data.entries:
69 # Get publication date
70 pub_date = None
71 if hasattr(entry, 'published_parsed') and entry.published_parsed:
72 pub_date = datetime.datetime.fromtimestamp(mktime(entry.published_parsed))
73 elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
74 pub_date = datetime.datetime.fromtimestamp(mktime(entry.updated_parsed))
75
76 if not pub_date:
77 print(f"Skipping entry without date: {entry.get('title', 'Unknown')}", file=sys.stderr)
78 continue
79
80 # Get title
81 title = entry.get('title', 'No title')
82
83 # Get link
84 link = entry.get('link', '')
85
86 # Get description/content
87 if hasattr(entry, 'content') and entry.content:
88 content = entry.content[0].value
89 else:
90 content = entry.get('summary', '')
91
92 # Get unique ID
93 entry_id = entry.get('id', link)
94
95 all_entries.append({
96 'title': title,
97 'link': link,
98 'content': content,
99 'author': author_name,
100 'pub_date': pub_date,
101 'feed_title': feed_title,
102 'id': entry_id
103 })
104
105 # Sort by publication date (newest first)
106 sorted_entries = sorted(all_entries, key=lambda x: x['pub_date'], reverse=True)
107 print(f"Total entries after sorting: {len(sorted_entries)}", file=sys.stderr)
108 return sorted_entries
109
110def format_pubdate(pubdate):
111 # Format the date with short month (three-letter)
112 return pubdate.strftime('%d %b %Y %H:%M:%S')
113
114def create_atom_feed(entries):
115 feed = Atom1Feed(
116 title="Atomic EEG",
117 link="https://example.com/", # Placeholder link
118 description="Aggregated Atom feeds",
119 language="en",
120 author_name="Feed Aggregator",
121 feed_url="https://example.com/eeg.xml" # Placeholder feed URL
122 )
123
124 for entry in entries:
125 # Format the date with short month name
126 formatted_date = format_pubdate(entry['pub_date'])
127 feed.add_item(
128 title=entry['title'],
129 link=entry['link'],
130 description=entry['content'],
131 author_name=entry['author'],
132 pubdate=entry['pub_date'],
133 unique_id=entry['id'],
134 categories=[entry['feed_title']], # Use feed title as category for attribution
135 # Add formatted date as extra field
136 updateddate=entry['pub_date'],
137 formatted_date=formatted_date
138 )
139
140 return feed
141
142def main():
143 # Load feed URLs
144 feed_urls = load_feed_urls('feed.json')
145
146 # Load mapping
147 mapping = load_mapping('mapping.json')
148
149 # Fetch feed data
150 print(f"Fetching {len(feed_urls)} feeds...", file=sys.stderr)
151 feeds = []
152 for url in feed_urls:
153 feed_data = get_feed_data(url, mapping)
154 if feed_data:
155 feeds.append(feed_data)
156
157 # Extract and sort entries
158 print("Processing entries...", file=sys.stderr)
159 entries = extract_entries(feeds)
160 print(f"Found {len(entries)} entries to include in feed", file=sys.stderr)
161
162 # Create aggregated feed
163 feed = create_atom_feed(entries)
164
165 # Write to file
166 with open('eeg.xml', 'w') as f:
167 feed.write(f, 'utf-8')
168
169 print(f"Feed successfully written to eeg.xml", file=sys.stderr)
170
171if __name__ == "__main__":
172 main()