···
+
"""Zulip bot for automatically posting thicket feed updates."""
+
from datetime import datetime
+
from pathlib import Path
+
from typing import Any, Dict, List, Optional, Set, Tuple
+
from zulip_bots.lib import BotHandler
+
# Handle imports for both direct execution and package import
+
from ..core.git_store import GitStore
+
from ..models import AtomEntry, ThicketConfig
+
from ..cli.commands.sync import sync_feed
+
# When run directly by zulip-bots, add the package to path
+
src_dir = Path(__file__).parent.parent.parent
+
if str(src_dir) not in sys.path:
+
sys.path.insert(0, str(src_dir))
+
from thicket.core.git_store import GitStore
+
from thicket.models import AtomEntry, ThicketConfig
+
from thicket.cli.commands.sync import sync_feed
+
class ThicketBotHandler:
+
"""Zulip bot that monitors thicket feeds and posts new articles."""
+
def __init__(self) -> None:
+
"""Initialize the thicket bot."""
+
self.logger = logging.getLogger(__name__)
+
self.git_store: Optional[GitStore] = None
+
self.config: Optional[ThicketConfig] = None
+
self.posted_entries: Set[str] = set()
+
# Bot configuration from storage
+
self.stream_name: Optional[str] = None
+
self.topic_name: Optional[str] = None
+
self.sync_interval: int = 300 # 5 minutes default
+
self.max_entries_per_sync: int = 10
+
self.config_path: Optional[Path] = None
+
# Debug mode configuration
+
self.debug_user: Optional[str] = None
+
self.debug_zulip_user_id: Optional[str] = None
+
def usage(self) -> str:
+
"""Return bot usage instructions."""
+
This bot automatically monitors thicket feeds and posts new articles.
+
- `@mention status` - Show current bot status and configuration
+
- `@mention sync now` - Force an immediate sync
+
- `@mention reset` - Clear posting history (will repost recent entries)
+
- `@mention config stream <stream_name>` - Set target stream
+
- `@mention config topic <topic_name>` - Set target topic
+
- `@mention config interval <seconds>` - Set sync interval
+
- `@mention help` - Show this help message
+
def initialize(self, bot_handler: BotHandler) -> None:
+
"""Initialize the bot with persistent storage."""
+
self.logger.info("Initializing ThicketBot")
+
# Get configuration from environment (set by CLI)
+
self.debug_user = os.getenv("THICKET_DEBUG_USER")
+
config_path_env = os.getenv("THICKET_CONFIG_PATH")
+
self.config_path = Path(config_path_env)
+
self.logger.info(f"Using thicket config: {self.config_path}")
+
# Load bot configuration from persistent storage
+
self._load_bot_config(bot_handler)
+
# Initialize thicket components
+
self._initialize_thicket()
+
self._load_posted_entries(bot_handler)
+
# Validate debug mode if enabled
+
self._validate_debug_mode(bot_handler)
+
self.logger.error(f"Failed to initialize thicket: {e}")
+
# Start background sync loop
+
self._schedule_sync(bot_handler)
+
def handle_message(self, message: Dict[str, Any], bot_handler: BotHandler) -> None:
+
"""Handle incoming Zulip messages."""
+
content = message["content"].strip()
+
sender = message["sender_full_name"]
+
# Only respond to mentions
+
if not self._is_mentioned(content, bot_handler):
+
cleaned_content = self._clean_mention(content, bot_handler)
+
command_parts = cleaned_content.split()
+
self._send_help(message, bot_handler)
+
command = command_parts[0].lower()
+
self._send_help(message, bot_handler)
+
elif command == "status":
+
self._send_status(message, bot_handler, sender)
+
elif command == "sync" and len(command_parts) > 1 and command_parts[1] == "now":
+
self._handle_force_sync(message, bot_handler, sender)
+
elif command == "reset":
+
self._handle_reset_command(message, bot_handler, sender)
+
elif command == "config":
+
self._handle_config_command(message, bot_handler, command_parts[1:], sender)
+
bot_handler.send_reply(message, f"Unknown command: {command}. Type `@mention help` for usage.")
+
self.logger.error(f"Error handling command '{command}': {e}")
+
bot_handler.send_reply(message, f"Error processing command: {str(e)}")
+
def _is_mentioned(self, content: str, bot_handler: BotHandler) -> bool:
+
"""Check if the bot is mentioned in the message."""
+
# Get bot's actual name from Zulip
+
bot_info = bot_handler._client.get_profile()
+
if bot_info.get('result') == 'success':
+
bot_name = bot_info.get('full_name', '').lower()
+
return f"@{bot_name}" in content.lower() or f"@**{bot_name}**" in content.lower()
+
self.logger.debug(f"Could not get bot profile: {e}")
+
# Fallback to generic check
+
return "@thicket" in content.lower()
+
def _clean_mention(self, content: str, bot_handler: BotHandler) -> str:
+
"""Remove bot mention from message content."""
+
# Get bot's actual name from Zulip
+
bot_info = bot_handler._client.get_profile()
+
if bot_info.get('result') == 'success':
+
bot_name = bot_info.get('full_name', '')
+
# Remove @bot_name or @**bot_name**
+
escaped_name = re.escape(bot_name)
+
content = re.sub(rf'@(?:\*\*)?{escaped_name}(?:\*\*)?', '', content, flags=re.IGNORECASE).strip()
+
self.logger.debug(f"Could not get bot profile for mention cleaning: {e}")
+
# Fallback to removing @thicket
+
content = re.sub(r'@(?:\*\*)?thicket(?:\*\*)?', '', content, flags=re.IGNORECASE).strip()
+
def _send_help(self, message: Dict[str, Any], bot_handler: BotHandler) -> None:
+
"""Send help message."""
+
bot_handler.send_reply(message, self.usage())
+
def _send_status(self, message: Dict[str, Any], bot_handler: BotHandler, sender: str) -> None:
+
"""Send bot status information."""
+
f"**Thicket Bot Status** (requested by {sender})",
+
f"🐛 **Debug Mode:** ENABLED",
+
f"🎯 **Debug User:** {self.debug_user}",
+
f"📍 **Stream:** {self.stream_name or 'Not configured'}",
+
f"📝 **Topic:** {self.topic_name or 'Not configured'}",
+
f"⏱️ **Sync Interval:** {self.sync_interval}s ({self.sync_interval // 60}m {self.sync_interval % 60}s)",
+
f"📊 **Max Entries/Sync:** {self.max_entries_per_sync}",
+
f"📁 **Config Path:** {self.config_path or 'Not configured'}",
+
f"📄 **Tracked Entries:** {len(self.posted_entries)}",
+
f"🔄 **Catchup Mode:** {'Active (first run)' if len(self.posted_entries) == 0 else 'Inactive'}",
+
f"✅ **Thicket Initialized:** {'Yes' if self.git_store else 'No'}",
+
bot_handler.send_reply(message, "\n".join(status_lines))
+
def _handle_force_sync(self, message: Dict[str, Any], bot_handler: BotHandler, sender: str) -> None:
+
"""Handle immediate sync request."""
+
if not self._check_initialization(message, bot_handler):
+
bot_handler.send_reply(message, f"🔄 Starting immediate sync... (requested by {sender})")
+
new_entries = self._perform_sync(bot_handler)
+
bot_handler.send_reply(
+
f"✅ Sync completed! Found {len(new_entries)} new entries."
+
self.logger.error(f"Force sync failed: {e}")
+
bot_handler.send_reply(message, f"❌ Sync failed: {str(e)}")
+
def _handle_reset_command(self, message: Dict[str, Any], bot_handler: BotHandler, sender: str) -> None:
+
"""Handle reset command to clear posted entries tracking."""
+
self.posted_entries.clear()
+
self._save_posted_entries(bot_handler)
+
bot_handler.send_reply(
+
f"✅ Posting history reset! Recent entries will be posted on next sync. (requested by {sender})"
+
self.logger.info(f"Posted entries tracking reset by {sender}")
+
self.logger.error(f"Reset failed: {e}")
+
bot_handler.send_reply(message, f"❌ Reset failed: {str(e)}")
+
def _handle_config_command(
+
message: Dict[str, Any],
+
bot_handler: BotHandler,
+
"""Handle configuration commands."""
+
bot_handler.send_reply(message, "Usage: `@mention config <setting> <value>`")
+
setting = args[0].lower()
+
value = " ".join(args[1:])
+
if setting == "stream":
+
self.stream_name = value
+
self._save_bot_config(bot_handler)
+
bot_handler.send_reply(message, f"✅ Stream set to: **{value}** (by {sender})")
+
elif setting == "topic":
+
self.topic_name = value
+
self._save_bot_config(bot_handler)
+
bot_handler.send_reply(message, f"✅ Topic set to: **{value}** (by {sender})")
+
elif setting == "interval":
+
bot_handler.send_reply(message, "❌ Interval must be at least 60 seconds")
+
self.sync_interval = interval
+
self._save_bot_config(bot_handler)
+
bot_handler.send_reply(message, f"✅ Sync interval set to: **{interval}s** (by {sender})")
+
bot_handler.send_reply(message, "❌ Invalid interval value. Must be a number of seconds.")
+
bot_handler.send_reply(
+
f"❌ Unknown setting: {setting}. Available: stream, topic, interval"
+
def _load_bot_config(self, bot_handler: BotHandler) -> None:
+
"""Load bot configuration from persistent storage."""
+
config_data = bot_handler.storage.get("bot_config")
+
config = json.loads(config_data)
+
self.stream_name = config.get("stream_name")
+
self.topic_name = config.get("topic_name")
+
self.sync_interval = config.get("sync_interval", 300)
+
self.max_entries_per_sync = config.get("max_entries_per_sync", 10)
+
# Bot config not found on first run is expected
+
def _save_bot_config(self, bot_handler: BotHandler) -> None:
+
"""Save bot configuration to persistent storage."""
+
"stream_name": self.stream_name,
+
"topic_name": self.topic_name,
+
"sync_interval": self.sync_interval,
+
"max_entries_per_sync": self.max_entries_per_sync,
+
bot_handler.storage.put("bot_config", json.dumps(config_data))
+
self.logger.error(f"Error saving bot config: {e}")
+
def _initialize_thicket(self) -> None:
+
"""Initialize thicket components."""
+
if not self.config_path or not self.config_path.exists():
+
raise ValueError("Thicket config file not found")
+
# Load thicket configuration
+
with open(self.config_path) as f:
+
config_data = yaml.safe_load(f)
+
self.config = ThicketConfig(**config_data)
+
self.git_store = GitStore(self.config.git_store)
+
self.logger.info("Thicket components initialized successfully")
+
def _validate_debug_mode(self, bot_handler: BotHandler) -> None:
+
"""Validate debug mode configuration."""
+
if not self.debug_user or not self.git_store:
+
# Get current Zulip server from environment
+
zulip_site_url = os.getenv("THICKET_ZULIP_SITE_URL", "")
+
server_url = zulip_site_url.replace("https://", "").replace("http://", "")
+
# Check if debug user exists in thicket
+
user = self.git_store.get_user(self.debug_user)
+
raise ValueError(f"Debug user '{self.debug_user}' not found in thicket")
+
# Check if user has Zulip association for this server
+
raise ValueError("Could not determine Zulip server URL")
+
zulip_user_id = user.get_zulip_mention(server_url)
+
raise ValueError(f"User '{self.debug_user}' has no Zulip association for server '{server_url}'")
+
# Try to look up the actual Zulip user ID from the email address
+
# But don't fail if we can't - we'll try again when sending messages
+
actual_user_id = self._lookup_zulip_user_id(bot_handler, zulip_user_id)
+
if actual_user_id and actual_user_id != zulip_user_id:
+
# Successfully resolved to numeric ID
+
self.debug_zulip_user_id = actual_user_id
+
self.logger.info(f"Debug mode enabled: Will send DMs to {self.debug_user} (email: {zulip_user_id}, user_id: {actual_user_id}) on {server_url}")
+
# Keep the email address, will resolve later when sending
+
self.debug_zulip_user_id = zulip_user_id
+
self.logger.info(f"Debug mode enabled: Will send DMs to {self.debug_user} ({zulip_user_id}) on {server_url} (will resolve user ID when sending)")
+
def _lookup_zulip_user_id(self, bot_handler: BotHandler, email_or_id: str) -> Optional[str]:
+
"""Look up Zulip user ID from email address or return the ID if it's already numeric."""
+
# If it's already a numeric user ID, return it
+
if email_or_id.isdigit():
+
client = bot_handler._client
+
self.logger.error("No Zulip client available for user lookup")
+
# First try the get_user_by_email API if available
+
user_result = client.get_user_by_email(email_or_id)
+
if user_result.get('result') == 'success':
+
user_data = user_result.get('user', {})
+
user_id = user_data.get('user_id')
+
self.logger.info(f"Found user ID {user_id} for '{email_or_id}' via get_user_by_email API")
+
except (AttributeError, Exception):
+
# Fallback: Get all users and search through them
+
users_result = client.get_users()
+
if users_result.get('result') == 'success':
+
for user in users_result['members']:
+
user_email = user.get('email', '')
+
delivery_email = user.get('delivery_email', '')
+
if (user_email == email_or_id or
+
delivery_email == email_or_id or
+
str(user.get('user_id')) == email_or_id):
+
user_id = user.get('user_id')
+
self.logger.error(f"No user found with identifier '{email_or_id}'. Searched {len(users_result['members'])} users.")
+
self.logger.error(f"Failed to get users: {users_result.get('msg', 'Unknown error')}")
+
self.logger.error(f"Error looking up user ID for '{email_or_id}': {e}")
+
def _lookup_zulip_user_info(self, bot_handler: BotHandler, email_or_id: str) -> Tuple[Optional[str], Optional[str]]:
+
"""Look up both Zulip user ID and full name from email address."""
+
if email_or_id.isdigit():
+
return email_or_id, None
+
client = bot_handler._client
+
# Try get_user_by_email API first
+
user_result = client.get_user_by_email(email_or_id)
+
if user_result.get('result') == 'success':
+
user_data = user_result.get('user', {})
+
user_id = user_data.get('user_id')
+
full_name = user_data.get('full_name', '')
+
return str(user_id), full_name
+
# Fallback: search all users
+
users_result = client.get_users()
+
if users_result.get('result') == 'success':
+
for user in users_result['members']:
+
if (user.get('email') == email_or_id or
+
user.get('delivery_email') == email_or_id):
+
return str(user.get('user_id')), user.get('full_name', '')
+
self.logger.error(f"Error looking up user info for '{email_or_id}': {e}")
+
def _load_posted_entries(self, bot_handler: BotHandler) -> None:
+
"""Load the set of already posted entries."""
+
posted_data = bot_handler.storage.get("posted_entries")
+
self.posted_entries = set(json.loads(posted_data))
+
# Empty set on first run is expected
+
self.posted_entries = set()
+
def _save_posted_entries(self, bot_handler: BotHandler) -> None:
+
"""Save the set of posted entries."""
+
bot_handler.storage.put("posted_entries", json.dumps(list(self.posted_entries)))
+
self.logger.error(f"Error saving posted entries: {e}")
+
def _check_initialization(self, message: Dict[str, Any], bot_handler: BotHandler) -> bool:
+
"""Check if thicket is properly initialized."""
+
if not self.git_store or not self.config:
+
bot_handler.send_reply(
+
"❌ Thicket not initialized. Please check configuration."
+
# In debug mode, we don't need stream/topic configuration
+
if not self.stream_name or not self.topic_name:
+
bot_handler.send_reply(
+
"❌ Stream and topic must be configured first. Use `@mention config stream <name>` and `@mention config topic <name>`"
+
def _schedule_sync(self, bot_handler: BotHandler) -> None:
+
"""Schedule periodic sync operations."""
+
can_sync = (self.git_store and
+
((self.stream_name and self.topic_name) or
+
self._perform_sync(bot_handler)
+
time.sleep(self.sync_interval)
+
self.logger.error(f"Error in sync loop: {e}")
+
time.sleep(60) # Wait before retrying
+
# Start background thread
+
sync_thread = threading.Thread(target=sync_loop, daemon=True)
+
def _perform_sync(self, bot_handler: BotHandler) -> List[AtomEntry]:
+
"""Perform thicket sync and return new entries."""
+
if not self.config or not self.git_store:
+
new_entries: List[Tuple[AtomEntry, str]] = [] # (entry, username) pairs
+
is_first_run = len(self.posted_entries) == 0
+
# Get all users and their feeds from git store
+
users_with_feeds = self.git_store.list_all_users_with_feeds()
+
# Sync each user's feeds
+
for username, feed_urls in users_with_feeds:
+
for feed_url in feed_urls:
+
# Run async sync function
+
loop = asyncio.new_event_loop()
+
asyncio.set_event_loop(loop)
+
new_count, _ = loop.run_until_complete(
+
sync_feed(self.git_store, username, str(feed_url), dry_run=False)
+
# Get the newly added entries
+
entries_to_check = self.git_store.list_entries(username, limit=new_count)
+
# Always check for catchup mode on first run
+
# Catchup mode: get last 5 entries on first run
+
catchup_entries = self.git_store.list_entries(username, limit=5)
+
entries_to_check = catchup_entries if not entries_to_check else entries_to_check
+
for entry in entries_to_check:
+
entry_key = f"{username}:{entry.id}"
+
if entry_key not in self.posted_entries:
+
new_entries.append((entry, username))
+
if len(new_entries) >= self.max_entries_per_sync:
+
self.logger.error(f"Error syncing feed {feed_url} for user {username}: {e}")
+
if len(new_entries) >= self.max_entries_per_sync:
+
# Post new entries to Zulip with rate limiting
+
for i, (entry, username) in enumerate(new_entries):
+
self._post_entry_to_zulip(entry, bot_handler, username)
+
self.posted_entries.add(f"{username}:{entry.id}")
+
# Rate limiting: pause after every 5 messages
+
if posted_count % 5 == 0 and i < len(new_entries) - 1:
+
self._save_posted_entries(bot_handler)
+
return [entry for entry, _ in new_entries]
+
def _post_entry_to_zulip(self, entry: AtomEntry, bot_handler: BotHandler, username: str) -> None:
+
"""Post a single entry to the configured Zulip stream/topic or debug user DM."""
+
# Get current Zulip server from environment
+
zulip_site_url = os.getenv("THICKET_ZULIP_SITE_URL", "")
+
server_url = zulip_site_url.replace("https://", "").replace("http://", "")
+
# Build author/date info consistently
+
if server_url and self.git_store:
+
user = self.git_store.get_user(username)
+
zulip_user_id = user.get_zulip_mention(server_url)
+
# Look up the actual Zulip full name for proper @mention
+
_, zulip_full_name = self._lookup_zulip_user_info(bot_handler, zulip_user_id)
+
display_name = zulip_full_name or user.display_name or username
+
# Check if author is different from the user - avoid redundancy
+
author_name = entry.author and entry.author.get("name")
+
if author_name and author_name.lower() != display_name.lower():
+
author_info = f" (by {author_name})"
+
published_info = f" • {entry.published.strftime('%Y-%m-%d')}"
+
mention_info = f"@**{display_name}** posted{author_info}{published_info}:\n\n"
+
# If no Zulip user found, use consistent format without @mention
+
user = self.git_store.get_user(username) if self.git_store else None
+
display_name = user.display_name if user else username
+
author_name = entry.author and entry.author.get("name")
+
if author_name and author_name.lower() != display_name.lower():
+
author_info = f" (by {author_name})"
+
published_info = f" • {entry.published.strftime('%Y-%m-%d')}"
+
mention_info = f"**{display_name}** posted{author_info}{published_info}:\n\n"
+
# Format the message with HTML processing
+
# Process HTML in summary and truncate if needed
+
processed_summary = self._process_html_content(entry.summary)
+
if len(processed_summary) > 400:
+
processed_summary = processed_summary[:397] + "..."
+
message_lines.append(f"\n{processed_summary}")
+
message_content = mention_info + "\n".join(message_lines)
+
# Choose destination based on mode
+
if self.debug_user and self.debug_zulip_user_id:
+
debug_message = f"🐛 **DEBUG:** New article from thicket user `{username}`:\n\n{message_content}"
+
# Ensure we have the numeric user ID
+
user_id_to_use = self.debug_zulip_user_id
+
if not user_id_to_use.isdigit():
+
# Need to look up the numeric ID
+
resolved_id = self._lookup_zulip_user_id(bot_handler, user_id_to_use)
+
user_id_to_use = resolved_id
+
self.logger.debug(f"Resolved {self.debug_zulip_user_id} to user ID {user_id_to_use}")
+
self.logger.error(f"Could not resolve user ID for {self.debug_zulip_user_id}")
+
# For private messages, user_id needs to be an integer, not string
+
user_id_int = int(user_id_to_use)
+
bot_handler.send_message({
+
"to": [user_id_int], # Use integer user ID
+
"content": debug_message
+
# If conversion to int fails, user_id_to_use might be an email
+
bot_handler.send_message({
+
"to": [user_id_to_use], # Try as string (email)
+
"content": debug_message
+
except Exception as e2:
+
self.logger.error(f"Failed to send DM to {self.debug_user} (tried both int and string): {e2}")
+
self.logger.error(f"Failed to send DM to {self.debug_user} ({user_id_to_use}): {e}")
+
self.logger.info(f"Posted entry to debug user {self.debug_user}: {entry.title}")
+
# Normal mode: send to stream/topic
+
bot_handler.send_message({
+
"to": self.stream_name,
+
"subject": self.topic_name,
+
"content": message_content
+
self.logger.info(f"Posted entry to stream: {entry.title} (user: {username})")
+
self.logger.error(f"Error posting entry to Zulip: {e}")
+
def _process_html_content(self, html_content: str) -> str:
+
"""Process HTML content from feeds to clean Zulip-compatible markdown."""
+
# Try to use markdownify for proper HTML to Markdown conversion
+
from markdownify import markdownify as md
+
# Convert HTML to Markdown with compact settings for summaries
+
heading_style="ATX", # Use # for headings (but we'll post-process these)
+
bullets="-", # Use - for bullets
+
convert=['a', 'b', 'strong', 'i', 'em', 'code', 'pre', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']
+
# Post-process to convert headings to bold for compact summaries
+
# Convert markdown headers to bold with period
+
markdown = re.sub(r'^#{1,6}\s*(.+)$', r'**\1.**', markdown, flags=re.MULTILINE)
+
# Clean up excessive newlines and make more compact
+
markdown = re.sub(r'\n\s*\n\s*\n+', ' ', markdown) # Multiple newlines become space
+
markdown = re.sub(r'\n\s*\n', '. ', markdown) # Double newlines become sentence breaks
+
markdown = re.sub(r'\n', ' ', markdown) # Single newlines become spaces
+
# Clean up double periods and excessive whitespace
+
markdown = re.sub(r'\.\.+', '.', markdown)
+
markdown = re.sub(r'\s+', ' ', markdown)
+
return markdown.strip()
+
# Fallback: manual HTML processing
+
# Convert headings to bold with periods for compact summaries
+
content = re.sub(r'<h[1-6](?:\s[^>]*)?>([^<]*)</h[1-6]>', r'**\1.** ', content, flags=re.IGNORECASE)
+
# Convert common HTML elements to Markdown
+
content = re.sub(r'<(?:strong|b)(?:\s[^>]*)?>([^<]*)</(?:strong|b)>', r'**\1**', content, flags=re.IGNORECASE)
+
content = re.sub(r'<(?:em|i)(?:\s[^>]*)?>([^<]*)</(?:em|i)>', r'*\1*', content, flags=re.IGNORECASE)
+
content = re.sub(r'<code(?:\s[^>]*)?>([^<]*)</code>', r'`\1`', content, flags=re.IGNORECASE)
+
content = re.sub(r'<a(?:\s[^>]*?)?\s*href=["\']([^"\']*)["\'](?:\s[^>]*)?>([^<]*)</a>', r'[\2](\1)', content, flags=re.IGNORECASE)
+
# Convert block elements to spaces instead of newlines for compactness
+
content = re.sub(r'<br\s*/?>', ' ', content, flags=re.IGNORECASE)
+
content = re.sub(r'</p>\s*<p>', '. ', content, flags=re.IGNORECASE)
+
content = re.sub(r'</?(?:p|div)(?:\s[^>]*)?>', ' ', content, flags=re.IGNORECASE)
+
# Remove remaining HTML tags
+
content = re.sub(r'<[^>]+>', '', content)
+
# Clean up whitespace and make compact
+
content = re.sub(r'\s+', ' ', content) # Multiple whitespace becomes single space
+
content = re.sub(r'\.\.+', '.', content) # Multiple periods become single period
+
self.logger.error(f"Error processing HTML content: {e}")
+
# Last resort: just strip HTML tags
+
return re.sub(r'<[^>]+>', '', html_content).strip()
+
handler_class = ThicketBotHandler