A community based topic aggregation platform built on atproto
1"""
2Rich Text Formatter for Coves posts.
3
4Converts KagiStory objects to Coves rich text format with facets.
5Handles UTF-8 byte position calculation for multi-byte characters.
6"""
7import logging
8from typing import Dict, List, Tuple
9from src.models import KagiStory, Perspective, Source
10
11logger = logging.getLogger(__name__)
12
13
14class RichTextFormatter:
15 """
16 Formats KagiStory into Coves rich text with facets.
17
18 Applies:
19 - Bold facets for section headers and perspective actors
20 - Italic facets for quotes
21 - Link facets for all URLs
22 """
23
24 def format_full(self, story: KagiStory) -> Dict:
25 """
26 Format KagiStory into full rich text format.
27
28 Args:
29 story: KagiStory object to format
30
31 Returns:
32 Dictionary with 'content' (str) and 'facets' (list)
33 """
34 builder = RichTextBuilder()
35
36 # Summary
37 builder.add_text(story.summary)
38 builder.add_text("\n\n")
39
40 # Highlights (if present)
41 if story.highlights:
42 builder.add_bold("Highlights:")
43 builder.add_text("\n")
44 for highlight in story.highlights:
45 builder.add_text(f"• {highlight}\n")
46 builder.add_text("\n")
47
48 # Perspectives (if present)
49 if story.perspectives:
50 builder.add_bold("Perspectives:")
51 builder.add_text("\n")
52 for perspective in story.perspectives:
53 # Bold the actor name
54 actor_with_colon = f"{perspective.actor}:"
55 builder.add_bold(actor_with_colon)
56 builder.add_text(f" {perspective.description} (")
57
58 # Add link to source
59 source_link_text = "Source"
60 builder.add_link(source_link_text, perspective.source_url)
61 builder.add_text(")\n")
62 builder.add_text("\n")
63
64 # Quote (if present)
65 if story.quote:
66 quote_text = f'"{story.quote.text}"'
67 builder.add_italic(quote_text)
68 builder.add_text(f" — {story.quote.attribution}\n\n")
69
70 # Sources (if present)
71 if story.sources:
72 builder.add_bold("Sources:")
73 builder.add_text("\n")
74 for source in story.sources:
75 builder.add_text("• ")
76 builder.add_link(source.title, source.url)
77 builder.add_text(f" - {source.domain}\n")
78 builder.add_text("\n")
79
80 # Kagi News attribution
81 builder.add_text("---\n📰 Story aggregated by ")
82 builder.add_link("Kagi News", story.link)
83
84 return builder.build()
85
86
87class RichTextBuilder:
88 """
89 Helper class to build rich text content with facets.
90
91 Handles UTF-8 byte position tracking automatically.
92 """
93
94 def __init__(self):
95 self.content_parts = []
96 self.facets = []
97
98 def add_text(self, text: str):
99 """Add plain text without any facets."""
100 self.content_parts.append(text)
101
102 def add_bold(self, text: str):
103 """Add text with bold facet."""
104 start_byte = self._get_current_byte_position()
105 self.content_parts.append(text)
106 end_byte = self._get_current_byte_position()
107
108 self.facets.append({
109 "index": {
110 "byteStart": start_byte,
111 "byteEnd": end_byte
112 },
113 "features": [
114 {"$type": "social.coves.richtext.facet#bold"}
115 ]
116 })
117
118 def add_italic(self, text: str):
119 """Add text with italic facet."""
120 start_byte = self._get_current_byte_position()
121 self.content_parts.append(text)
122 end_byte = self._get_current_byte_position()
123
124 self.facets.append({
125 "index": {
126 "byteStart": start_byte,
127 "byteEnd": end_byte
128 },
129 "features": [
130 {"$type": "social.coves.richtext.facet#italic"}
131 ]
132 })
133
134 def add_link(self, text: str, uri: str):
135 """Add text with link facet."""
136 start_byte = self._get_current_byte_position()
137 self.content_parts.append(text)
138 end_byte = self._get_current_byte_position()
139
140 self.facets.append({
141 "index": {
142 "byteStart": start_byte,
143 "byteEnd": end_byte
144 },
145 "features": [
146 {
147 "$type": "social.coves.richtext.facet#link",
148 "uri": uri
149 }
150 ]
151 })
152
153 def _get_current_byte_position(self) -> int:
154 """
155 Get the current byte position in the content.
156
157 Uses UTF-8 encoding to handle multi-byte characters correctly.
158 """
159 current_content = ''.join(self.content_parts)
160 return len(current_content.encode('utf-8'))
161
162 def build(self) -> Dict:
163 """
164 Build the final rich text object.
165
166 Returns:
167 Dictionary with 'content' and 'facets'
168 """
169 content = ''.join(self.content_parts)
170
171 # Sort facets by start position for consistency
172 sorted_facets = sorted(self.facets, key=lambda f: f['index']['byteStart'])
173
174 return {
175 "content": content,
176 "facets": sorted_facets
177 }