A community based topic aggregation platform built on atproto
1"""
2Rich Text Formatter for Coves posts.
3
4Converts KagiStory objects to Coves rich text format with facets.
5Handles UTF-8 byte position calculation for multi-byte characters.
6"""
7import logging
8from typing import Dict, List, Tuple
9from src.models import KagiStory, Perspective, Source
10
11logger = logging.getLogger(__name__)
12
13
14class RichTextFormatter:
15 """
16 Formats KagiStory into Coves rich text with facets.
17
18 Applies:
19 - Bold facets for section headers and perspective actors
20 - Italic facets for quotes
21 - Link facets for all URLs
22 """
23
24 def format_full(self, story: KagiStory) -> Dict:
25 """
26 Format KagiStory into full rich text format.
27
28 Args:
29 story: KagiStory object to format
30
31 Returns:
32 Dictionary with 'content' (str) and 'facets' (list)
33 """
34 builder = RichTextBuilder()
35
36 # Summary
37 builder.add_text(story.summary)
38 builder.add_text("\n\n")
39
40 # Highlights (if present)
41 if story.highlights:
42 builder.add_bold("Highlights:")
43 builder.add_text("\n")
44 for highlight in story.highlights:
45 builder.add_text(f"• {highlight}\n\n")
46 builder.add_text("\n")
47
48 # Perspectives (if present)
49 if story.perspectives:
50 builder.add_bold("Perspectives:")
51 builder.add_text("\n")
52 for perspective in story.perspectives:
53 # Bold the actor name
54 actor_with_colon = f"{perspective.actor}:"
55 builder.add_bold(actor_with_colon)
56 builder.add_text(f" {perspective.description}")
57
58 # Add link to source if available
59 if perspective.source_url:
60 builder.add_text(" (")
61 source_link_text = perspective.source_name if perspective.source_name else "Source"
62 builder.add_link(source_link_text, perspective.source_url)
63 builder.add_text(")")
64
65 builder.add_text("\n\n")
66 builder.add_text("\n")
67
68 # Quote (if present)
69 if story.quote:
70 quote_text = f'"{story.quote.text}"'
71 builder.add_italic(quote_text)
72 builder.add_text(f" — {story.quote.attribution}\n\n")
73
74 # Sources (if present)
75 if story.sources:
76 builder.add_bold("Sources:")
77 builder.add_text("\n")
78 for source in story.sources:
79 builder.add_text("• ")
80 builder.add_link(source.title, source.url)
81 builder.add_text(f" - {source.domain}\n\n")
82 builder.add_text("\n")
83
84 # Kagi News attribution
85 builder.add_text("---\n📰 Story aggregated by ")
86 builder.add_link("Kagi News", story.link)
87
88 return builder.build()
89
90
91class RichTextBuilder:
92 """
93 Helper class to build rich text content with facets.
94
95 Handles UTF-8 byte position tracking automatically.
96 """
97
98 def __init__(self):
99 self.content_parts = []
100 self.facets = []
101
102 def add_text(self, text: str):
103 """Add plain text without any facets."""
104 self.content_parts.append(text)
105
106 def add_bold(self, text: str):
107 """Add text with bold facet."""
108 start_byte = self._get_current_byte_position()
109 self.content_parts.append(text)
110 end_byte = self._get_current_byte_position()
111
112 self.facets.append({
113 "index": {
114 "byteStart": start_byte,
115 "byteEnd": end_byte
116 },
117 "features": [
118 {"$type": "social.coves.richtext.facet#bold"}
119 ]
120 })
121
122 def add_italic(self, text: str):
123 """Add text with italic facet."""
124 start_byte = self._get_current_byte_position()
125 self.content_parts.append(text)
126 end_byte = self._get_current_byte_position()
127
128 self.facets.append({
129 "index": {
130 "byteStart": start_byte,
131 "byteEnd": end_byte
132 },
133 "features": [
134 {"$type": "social.coves.richtext.facet#italic"}
135 ]
136 })
137
138 def add_link(self, text: str, uri: str):
139 """Add text with link facet."""
140 start_byte = self._get_current_byte_position()
141 self.content_parts.append(text)
142 end_byte = self._get_current_byte_position()
143
144 self.facets.append({
145 "index": {
146 "byteStart": start_byte,
147 "byteEnd": end_byte
148 },
149 "features": [
150 {
151 "$type": "social.coves.richtext.facet#link",
152 "uri": uri
153 }
154 ]
155 })
156
157 def _get_current_byte_position(self) -> int:
158 """
159 Get the current byte position in the content.
160
161 Uses UTF-8 encoding to handle multi-byte characters correctly.
162 """
163 current_content = ''.join(self.content_parts)
164 return len(current_content.encode('utf-8'))
165
166 def build(self) -> Dict:
167 """
168 Build the final rich text object.
169
170 Returns:
171 Dictionary with 'content' and 'facets'
172 """
173 content = ''.join(self.content_parts)
174
175 # Sort facets by start position for consistency
176 sorted_facets = sorted(self.facets, key=lambda f: f['index']['byteStart'])
177
178 return {
179 "content": content,
180 "facets": sorted_facets
181 }