Atom feed for our EEG site
1# /// script 2# requires-python = ">=3.11" 3# dependencies = [ 4# "feedparser", 5# "feedgenerator", 6# "requests", 7# ] 8# /// 9# Do not delete the above as its needed for `uv run` 10#!/usr/bin/env python3 11 12import json 13import feedparser 14import datetime 15from time import mktime 16from feedgenerator import Atom1Feed 17import requests 18import sys 19import os 20 21def load_feed_urls(file_path): 22 with open(file_path, 'r') as f: 23 data = json.load(f) 24 return [item['url'] for item in data] 25 26def load_mapping(file_path): 27 if not os.path.exists(file_path): 28 return {} 29 30 with open(file_path, 'r') as f: 31 return json.load(f) 32 33def get_feed_data(url, mapping): 34 try: 35 response = requests.get(url, timeout=30) 36 response.raise_for_status() 37 feed_data = feedparser.parse(response.content) 38 print(f"Fetched {url}: found {len(feed_data.entries)} entries", file=sys.stderr) 39 40 # Add mapping info to feed_data 41 if url in mapping: 42 feed_data.mapping = mapping[url] 43 else: 44 feed_data.mapping = None 45 46 return feed_data 47 except Exception as e: 48 print(f"Error fetching {url}: {e}", file=sys.stderr) 49 return None 50 51def extract_entries(feeds): 52 all_entries = [] 53 for feed_data in feeds: 54 if not feed_data or not hasattr(feed_data, 'entries'): 55 continue 56 57 # Get feed title and handle mapping 58 feed_title = feed_data.feed.get('title', 'Unknown Source') 59 author_name = 'Unknown' 60 61 if hasattr(feed_data, 'mapping') and feed_data.mapping: 62 author_name = feed_data.mapping.get('name', 'Unknown') 63 # Optionally use mapped site name 64 # feed_title = feed_data.mapping.get('site', feed_title) 65 66 print(f"Processing feed: {feed_title} ({len(feed_data.entries)} entries)", file=sys.stderr) 67 68 for entry in feed_data.entries: 69 # Get publication date 70 pub_date = None 71 if hasattr(entry, 'published_parsed') and entry.published_parsed: 72 pub_date = datetime.datetime.fromtimestamp(mktime(entry.published_parsed)) 73 elif hasattr(entry, 'updated_parsed') and entry.updated_parsed: 74 pub_date = datetime.datetime.fromtimestamp(mktime(entry.updated_parsed)) 75 76 if not pub_date: 77 print(f"Skipping entry without date: {entry.get('title', 'Unknown')}", file=sys.stderr) 78 continue 79 80 # Get title 81 title = entry.get('title', 'No title') 82 83 # Get link 84 link = entry.get('link', '') 85 86 # Get description/content 87 if hasattr(entry, 'content') and entry.content: 88 content = entry.content[0].value 89 else: 90 content = entry.get('summary', '') 91 92 # Get unique ID 93 entry_id = entry.get('id', link) 94 95 all_entries.append({ 96 'title': title, 97 'link': link, 98 'content': content, 99 'author': author_name, 100 'pub_date': pub_date, 101 'feed_title': feed_title, 102 'id': entry_id 103 }) 104 105 # Sort by publication date (newest first) 106 sorted_entries = sorted(all_entries, key=lambda x: x['pub_date'], reverse=True) 107 print(f"Total entries after sorting: {len(sorted_entries)}", file=sys.stderr) 108 return sorted_entries 109 110def format_pubdate(pubdate): 111 # Format the date with short month (three-letter) 112 return pubdate.strftime('%d %b %Y %H:%M:%S') 113 114def create_atom_feed(entries): 115 feed = Atom1Feed( 116 title="Atomic EEG", 117 link="https://example.com/", # Placeholder link 118 description="Aggregated Atom feeds", 119 language="en", 120 author_name="Feed Aggregator", 121 feed_url="https://example.com/eeg.xml" # Placeholder feed URL 122 ) 123 124 for entry in entries: 125 # Format the date with short month name 126 formatted_date = format_pubdate(entry['pub_date']) 127 feed.add_item( 128 title=entry['title'], 129 link=entry['link'], 130 description=entry['content'], 131 author_name=entry['author'], 132 pubdate=entry['pub_date'], 133 unique_id=entry['id'], 134 categories=[entry['feed_title']], # Use feed title as category for attribution 135 # Add formatted date as extra field 136 updateddate=entry['pub_date'], 137 formatted_date=formatted_date 138 ) 139 140 return feed 141 142def main(): 143 # Load feed URLs 144 feed_urls = load_feed_urls('feed.json') 145 146 # Load mapping 147 mapping = load_mapping('mapping.json') 148 149 # Fetch feed data 150 print(f"Fetching {len(feed_urls)} feeds...", file=sys.stderr) 151 feeds = [] 152 for url in feed_urls: 153 feed_data = get_feed_data(url, mapping) 154 if feed_data: 155 feeds.append(feed_data) 156 157 # Extract and sort entries 158 print("Processing entries...", file=sys.stderr) 159 entries = extract_entries(feeds) 160 print(f"Found {len(entries)} entries to include in feed", file=sys.stderr) 161 162 # Create aggregated feed 163 feed = create_atom_feed(entries) 164 165 # Write to file 166 with open('eeg.xml', 'w') as f: 167 feed.write(f, 'utf-8') 168 169 print(f"Feed successfully written to eeg.xml", file=sys.stderr) 170 171if __name__ == "__main__": 172 main()