A community based topic aggregation platform built on atproto
at main 5.5 kB view raw
1""" 2Rich Text Formatter for Coves posts. 3 4Converts KagiStory objects to Coves rich text format with facets. 5Handles UTF-8 byte position calculation for multi-byte characters. 6""" 7import logging 8from typing import Dict, List, Tuple 9from src.models import KagiStory, Perspective, Source 10 11logger = logging.getLogger(__name__) 12 13 14class RichTextFormatter: 15 """ 16 Formats KagiStory into Coves rich text with facets. 17 18 Applies: 19 - Bold facets for section headers and perspective actors 20 - Italic facets for quotes 21 - Link facets for all URLs 22 """ 23 24 def format_full(self, story: KagiStory) -> Dict: 25 """ 26 Format KagiStory into full rich text format. 27 28 Args: 29 story: KagiStory object to format 30 31 Returns: 32 Dictionary with 'content' (str) and 'facets' (list) 33 """ 34 builder = RichTextBuilder() 35 36 # Summary 37 builder.add_text(story.summary) 38 builder.add_text("\n\n") 39 40 # Highlights (if present) 41 if story.highlights: 42 builder.add_bold("Highlights:") 43 builder.add_text("\n") 44 for highlight in story.highlights: 45 builder.add_text(f"{highlight}\n\n") 46 builder.add_text("\n") 47 48 # Perspectives (if present) 49 if story.perspectives: 50 builder.add_bold("Perspectives:") 51 builder.add_text("\n") 52 for perspective in story.perspectives: 53 # Bold the actor name 54 actor_with_colon = f"{perspective.actor}:" 55 builder.add_bold(actor_with_colon) 56 builder.add_text(f" {perspective.description}") 57 58 # Add link to source if available 59 if perspective.source_url: 60 builder.add_text(" (") 61 source_link_text = perspective.source_name if perspective.source_name else "Source" 62 builder.add_link(source_link_text, perspective.source_url) 63 builder.add_text(")") 64 65 builder.add_text("\n\n") 66 builder.add_text("\n") 67 68 # Quote (if present) 69 if story.quote: 70 quote_text = f'"{story.quote.text}"' 71 builder.add_italic(quote_text) 72 builder.add_text(f"{story.quote.attribution}\n\n") 73 74 # Sources (if present) 75 if story.sources: 76 builder.add_bold("Sources:") 77 builder.add_text("\n") 78 for source in story.sources: 79 builder.add_text("") 80 builder.add_link(source.title, source.url) 81 builder.add_text(f" - {source.domain}\n\n") 82 builder.add_text("\n") 83 84 # Kagi News attribution 85 builder.add_text("---\n📰 Story aggregated by ") 86 builder.add_link("Kagi News", story.link) 87 88 return builder.build() 89 90 91class RichTextBuilder: 92 """ 93 Helper class to build rich text content with facets. 94 95 Handles UTF-8 byte position tracking automatically. 96 """ 97 98 def __init__(self): 99 self.content_parts = [] 100 self.facets = [] 101 102 def add_text(self, text: str): 103 """Add plain text without any facets.""" 104 self.content_parts.append(text) 105 106 def add_bold(self, text: str): 107 """Add text with bold facet.""" 108 start_byte = self._get_current_byte_position() 109 self.content_parts.append(text) 110 end_byte = self._get_current_byte_position() 111 112 self.facets.append({ 113 "index": { 114 "byteStart": start_byte, 115 "byteEnd": end_byte 116 }, 117 "features": [ 118 {"$type": "social.coves.richtext.facet#bold"} 119 ] 120 }) 121 122 def add_italic(self, text: str): 123 """Add text with italic facet.""" 124 start_byte = self._get_current_byte_position() 125 self.content_parts.append(text) 126 end_byte = self._get_current_byte_position() 127 128 self.facets.append({ 129 "index": { 130 "byteStart": start_byte, 131 "byteEnd": end_byte 132 }, 133 "features": [ 134 {"$type": "social.coves.richtext.facet#italic"} 135 ] 136 }) 137 138 def add_link(self, text: str, uri: str): 139 """Add text with link facet.""" 140 start_byte = self._get_current_byte_position() 141 self.content_parts.append(text) 142 end_byte = self._get_current_byte_position() 143 144 self.facets.append({ 145 "index": { 146 "byteStart": start_byte, 147 "byteEnd": end_byte 148 }, 149 "features": [ 150 { 151 "$type": "social.coves.richtext.facet#link", 152 "uri": uri 153 } 154 ] 155 }) 156 157 def _get_current_byte_position(self) -> int: 158 """ 159 Get the current byte position in the content. 160 161 Uses UTF-8 encoding to handle multi-byte characters correctly. 162 """ 163 current_content = ''.join(self.content_parts) 164 return len(current_content.encode('utf-8')) 165 166 def build(self) -> Dict: 167 """ 168 Build the final rich text object. 169 170 Returns: 171 Dictionary with 'content' and 'facets' 172 """ 173 content = ''.join(self.content_parts) 174 175 # Sort facets by start position for consistency 176 sorted_facets = sorted(self.facets, key=lambda f: f['index']['byteStart']) 177 178 return { 179 "content": content, 180 "facets": sorted_facets 181 }