Atom feed for our EEG site
1# /// script 2# requires-python = ">=3.11" 3# dependencies = [ 4# "feedparser", 5# "feedgenerator", 6# "requests", 7# ] 8# /// 9# Do not delete the above as its needed for `uv run` 10#!/usr/bin/env python3 11 12import json 13import feedparser 14import datetime 15from time import mktime 16from feedgenerator import Atom1Feed 17import requests 18import sys 19import os 20 21def load_feed_urls(file_path): 22 with open(file_path, 'r') as f: 23 data = json.load(f) 24 return [item['url'] for item in data] 25 26def load_mapping(file_path): 27 if not os.path.exists(file_path): 28 return {} 29 30 with open(file_path, 'r') as f: 31 return json.load(f) 32 33def get_feed_data(url, mapping): 34 try: 35 response = requests.get(url, timeout=30) 36 response.raise_for_status() 37 feed_data = feedparser.parse(response.content) 38 print(f"Fetched {url}: found {len(feed_data.entries)} entries", file=sys.stderr) 39 40 # Add mapping info to feed_data 41 if url in mapping: 42 feed_data.mapping = mapping[url] 43 else: 44 feed_data.mapping = None 45 46 return feed_data 47 except Exception as e: 48 print(f"Error fetching {url}: {e}", file=sys.stderr) 49 return None 50 51def extract_entries(feeds): 52 all_entries = [] 53 for feed_data in feeds: 54 if not feed_data or not hasattr(feed_data, 'entries'): 55 continue 56 57 # Get feed title and handle mapping 58 feed_title = feed_data.feed.get('title', 'Unknown Source') 59 author_name = 'Unknown' 60 61 if hasattr(feed_data, 'mapping') and feed_data.mapping: 62 author_name = feed_data.mapping.get('name', 'Unknown') 63 # Optionally use mapped site name 64 # feed_title = feed_data.mapping.get('site', feed_title) 65 66 print(f"Processing feed: {feed_title} ({len(feed_data.entries)} entries)", file=sys.stderr) 67 68 for entry in feed_data.entries: 69 # Get publication date 70 pub_date = None 71 if hasattr(entry, 'published_parsed') and entry.published_parsed: 72 pub_date = datetime.datetime.fromtimestamp(mktime(entry.published_parsed)) 73 elif hasattr(entry, 'updated_parsed') and entry.updated_parsed: 74 pub_date = datetime.datetime.fromtimestamp(mktime(entry.updated_parsed)) 75 76 if not pub_date: 77 print(f"Skipping entry without date: {entry.get('title', 'Unknown')}", file=sys.stderr) 78 continue 79 80 # Get title 81 title = entry.get('title', 'No title') 82 83 # Get link 84 link = entry.get('link', '') 85 86 # Get description/content 87 if hasattr(entry, 'content') and entry.content: 88 content = entry.content[0].value 89 else: 90 content = entry.get('summary', '') 91 92 # Get unique ID 93 entry_id = entry.get('id', link) 94 95 all_entries.append({ 96 'title': title, 97 'link': link, 98 'content': content, 99 'author': author_name, 100 'pub_date': pub_date, 101 'feed_title': feed_title, 102 'id': entry_id 103 }) 104 105 # Sort by publication date (newest first) 106 sorted_entries = sorted(all_entries, key=lambda x: x['pub_date'], reverse=True) 107 print(f"Total entries after sorting: {len(sorted_entries)}", file=sys.stderr) 108 return sorted_entries 109 110def create_atom_feed(entries): 111 feed = Atom1Feed( 112 title="Atomic EEG", 113 link="https://example.com/", # Placeholder link 114 description="Aggregated Atom feeds", 115 language="en", 116 author_name="Feed Aggregator", 117 feed_url="https://example.com/eeg.xml" # Placeholder feed URL 118 ) 119 120 for entry in entries: 121 feed.add_item( 122 title=entry['title'], 123 link=entry['link'], 124 description=entry['content'], 125 author_name=entry['author'], 126 pubdate=entry['pub_date'], 127 unique_id=entry['id'], 128 categories=[entry['feed_title']] # Use feed title as category for attribution 129 ) 130 131 return feed 132 133def main(): 134 # Load feed URLs 135 feed_urls = load_feed_urls('feed.json') 136 137 # Load mapping 138 mapping = load_mapping('mapping.json') 139 140 # Fetch feed data 141 print(f"Fetching {len(feed_urls)} feeds...", file=sys.stderr) 142 feeds = [] 143 for url in feed_urls: 144 feed_data = get_feed_data(url, mapping) 145 if feed_data: 146 feeds.append(feed_data) 147 148 # Extract and sort entries 149 print("Processing entries...", file=sys.stderr) 150 entries = extract_entries(feeds) 151 print(f"Found {len(entries)} entries to include in feed", file=sys.stderr) 152 153 # Create aggregated feed 154 feed = create_atom_feed(entries) 155 156 # Write to file 157 with open('eeg.xml', 'w') as f: 158 feed.write(f, 'utf-8') 159 160 print(f"Feed successfully written to eeg.xml", file=sys.stderr) 161 162if __name__ == "__main__": 163 main()