Atom feed for our EEG site
1# /// script 2# requires-python = ">=3.11" 3# dependencies = [ 4# "feedparser", 5# "feedgenerator", 6# "requests", 7# ] 8# /// 9# Do not delete the above as its needed for `uv run` 10#!/usr/bin/env python3 11 12import json 13import feedparser 14import datetime 15from time import mktime 16from feedgenerator import Atom1Feed 17import requests 18import sys 19import os 20 21def load_feed_urls(file_path): 22 with open(file_path, 'r') as f: 23 data = json.load(f) 24 return [item['url'] for item in data] 25 26def load_mapping(file_path): 27 if not os.path.exists(file_path): 28 return {} 29 30 with open(file_path, 'r') as f: 31 return json.load(f) 32 33def get_feed_data(url, mapping): 34 try: 35 response = requests.get(url, timeout=30) 36 response.raise_for_status() 37 feed_data = feedparser.parse(response.content) 38 print(f"Fetched {url}: found {len(feed_data.entries)} entries", file=sys.stderr) 39 40 # Add mapping info to feed_data 41 if url in mapping: 42 feed_data.mapping = mapping[url] 43 else: 44 feed_data.mapping = None 45 46 return feed_data 47 except Exception as e: 48 print(f"Error fetching {url}: {e}", file=sys.stderr) 49 return None 50 51def extract_entries(feeds): 52 all_entries = [] 53 for feed_data in feeds: 54 if not feed_data or not hasattr(feed_data, 'entries'): 55 continue 56 57 # Get feed title and handle mapping 58 feed_title = feed_data.feed.get('title', 'Unknown Source') 59 author_name = 'Unknown' 60 61 if hasattr(feed_data, 'mapping') and feed_data.mapping: 62 author_name = feed_data.mapping.get('name', 'Unknown') 63 64 print(f"Processing feed: {feed_title} ({len(feed_data.entries)} entries)", file=sys.stderr) 65 66 for entry in feed_data.entries: 67 # Get publication date 68 pub_date = None 69 if hasattr(entry, 'published_parsed') and entry.published_parsed: 70 pub_date = datetime.datetime.fromtimestamp(mktime(entry.published_parsed)) 71 elif hasattr(entry, 'updated_parsed') and entry.updated_parsed: 72 pub_date = datetime.datetime.fromtimestamp(mktime(entry.updated_parsed)) 73 74 if not pub_date: 75 print(f"Skipping entry without date: {entry.get('title', 'Unknown')}", file=sys.stderr) 76 continue 77 78 # Get title 79 title = entry.get('title', 'No title') 80 81 # Get link 82 link = entry.get('link', '') 83 84 # Get description/content 85 if hasattr(entry, 'content') and entry.content: 86 content = entry.content[0].value 87 else: 88 content = entry.get('summary', '') 89 90 # Get unique ID 91 entry_id = entry.get('id', link) 92 93 all_entries.append({ 94 'title': title, 95 'link': link, 96 'content': content, 97 'author': author_name, 98 'pub_date': pub_date, 99 'feed_title': feed_title, 100 'id': entry_id 101 }) 102 103 # Sort by publication date (newest first) 104 sorted_entries = sorted(all_entries, key=lambda x: x['pub_date'], reverse=True) 105 print(f"Total entries after sorting: {len(sorted_entries)}", file=sys.stderr) 106 return sorted_entries 107 108def format_pubdate(pubdate): 109 # Format the date with short month (three-letter) 110 return pubdate.strftime('%d %b %Y %H:%M:%S') 111 112def create_atom_feed(entries): 113 feed = Atom1Feed( 114 title="Atomic EEG", 115 link="https://example.com/", # Placeholder link 116 description="Aggregated Atom feeds", 117 language="en", 118 author_name="Feed Aggregator", 119 feed_url="https://example.com/eeg.xml" # Placeholder feed URL 120 ) 121 122 for entry in entries: 123 # Format the date with short month name 124 formatted_date = format_pubdate(entry['pub_date']) 125 feed.add_item( 126 title=entry['title'], 127 link=entry['link'], 128 description=entry['content'], 129 author_name=entry['author'], 130 pubdate=entry['pub_date'], 131 unique_id=entry['id'], 132 categories=[entry['feed_title']], # Use feed title as category for attribution 133 # Add formatted date as extra field 134 updateddate=entry['pub_date'], 135 formatted_date=formatted_date 136 ) 137 138 return feed 139 140def main(): 141 # Load feed URLs 142 feed_urls = load_feed_urls('feed.json') 143 144 # Load mapping 145 mapping = load_mapping('mapping.json') 146 147 # Fetch feed data 148 print(f"Fetching {len(feed_urls)} feeds...", file=sys.stderr) 149 feeds = [] 150 for url in feed_urls: 151 feed_data = get_feed_data(url, mapping) 152 if feed_data: 153 feeds.append(feed_data) 154 155 # Extract and sort entries 156 print("Processing entries...", file=sys.stderr) 157 entries = extract_entries(feeds) 158 print(f"Found {len(entries)} entries to include in feed", file=sys.stderr) 159 160 # Create aggregated feed 161 feed = create_atom_feed(entries) 162 163 # Write to file 164 with open('eeg.xml', 'w') as f: 165 feed.write(f, 'utf-8') 166 167 print(f"Feed successfully written to eeg.xml", file=sys.stderr) 168 169if __name__ == "__main__": 170 main()