A React component library for rendering common AT Protocol records for applications such as Bluesky and Leaflet.
1import type { AppBskyRichtextFacet } from "@atcute/bluesky";
2
3export interface TextSegment {
4 text: string;
5 facet?: AppBskyRichtextFacet.Main;
6}
7
8/**
9 * Converts a text string with facets into segments that can be rendered
10 * with appropriate styling and interactivity.
11 */
12export function createTextSegments(
13 text: string,
14 facets?: AppBskyRichtextFacet.Main[],
15): TextSegment[] {
16 if (!facets || facets.length === 0) {
17 return [{ text }];
18 }
19
20 // Build byte-to-char index mapping
21 const bytePrefix = buildBytePrefix(text);
22
23 // Sort facets by start position
24 const sortedFacets = [...facets].sort(
25 (a, b) => a.index.byteStart - b.index.byteStart,
26 );
27
28 const segments: TextSegment[] = [];
29 let currentPos = 0;
30
31 for (const facet of sortedFacets) {
32 const startChar = byteOffsetToCharIndex(bytePrefix, facet.index.byteStart);
33 const endChar = byteOffsetToCharIndex(bytePrefix, facet.index.byteEnd);
34
35 // Add plain text before this facet
36 if (startChar > currentPos) {
37 segments.push({
38 text: sliceByCharRange(text, currentPos, startChar),
39 });
40 }
41
42 // Add the faceted text
43 segments.push({
44 text: sliceByCharRange(text, startChar, endChar),
45 facet,
46 });
47
48 currentPos = endChar;
49 }
50
51 // Add remaining plain text
52 if (currentPos < text.length) {
53 segments.push({
54 text: sliceByCharRange(text, currentPos, text.length),
55 });
56 }
57
58 return segments;
59}
60
61/**
62 * Builds a byte offset prefix array for UTF-8 encoded text.
63 * This handles multi-byte characters correctly.
64 */
65function buildBytePrefix(text: string): number[] {
66 const encoder = new TextEncoder();
67 const prefix: number[] = [0];
68 let byteCount = 0;
69
70 for (let i = 0; i < text.length; ) {
71 const codePoint = text.codePointAt(i);
72 if (codePoint === undefined) break;
73
74 const char = String.fromCodePoint(codePoint);
75 const encoded = encoder.encode(char);
76 byteCount += encoded.length;
77 prefix.push(byteCount);
78
79 // Handle surrogate pairs (emojis, etc.)
80 i += codePoint > 0xffff ? 2 : 1;
81 }
82
83 return prefix;
84}
85
86/**
87 * Converts a byte offset to a character index using the byte prefix array.
88 */
89function byteOffsetToCharIndex(prefix: number[], byteOffset: number): number {
90 for (let i = 0; i < prefix.length; i++) {
91 if (prefix[i] === byteOffset) return i;
92 if (prefix[i] > byteOffset) return Math.max(0, i - 1);
93 }
94 return prefix.length - 1;
95}
96
97/**
98 * Slices text by character range, handling multi-byte characters correctly.
99 */
100function sliceByCharRange(text: string, start: number, end: number): string {
101 if (start <= 0 && end >= text.length) return text;
102
103 let result = "";
104 let charIndex = 0;
105
106 for (let i = 0; i < text.length && charIndex < end; ) {
107 const codePoint = text.codePointAt(i);
108 if (codePoint === undefined) break;
109
110 const char = String.fromCodePoint(codePoint);
111 if (charIndex >= start && charIndex < end) {
112 result += char;
113 }
114
115 i += codePoint > 0xffff ? 2 : 1;
116 charIndex++;
117 }
118
119 return result;
120}