A React component library for rendering common AT Protocol records for applications such as Bluesky and Leaflet.
at main 2.9 kB view raw
1import type { AppBskyRichtextFacet } from "@atcute/bluesky"; 2 3export interface TextSegment { 4 text: string; 5 facet?: AppBskyRichtextFacet.Main; 6} 7 8/** 9 * Converts a text string with facets into segments that can be rendered 10 * with appropriate styling and interactivity. 11 */ 12export function createTextSegments( 13 text: string, 14 facets?: AppBskyRichtextFacet.Main[], 15): TextSegment[] { 16 if (!facets || facets.length === 0) { 17 return [{ text }]; 18 } 19 20 // Build byte-to-char index mapping 21 const bytePrefix = buildBytePrefix(text); 22 23 // Sort facets by start position 24 const sortedFacets = [...facets].sort( 25 (a, b) => a.index.byteStart - b.index.byteStart, 26 ); 27 28 const segments: TextSegment[] = []; 29 let currentPos = 0; 30 31 for (const facet of sortedFacets) { 32 const startChar = byteOffsetToCharIndex(bytePrefix, facet.index.byteStart); 33 const endChar = byteOffsetToCharIndex(bytePrefix, facet.index.byteEnd); 34 35 // Add plain text before this facet 36 if (startChar > currentPos) { 37 segments.push({ 38 text: sliceByCharRange(text, currentPos, startChar), 39 }); 40 } 41 42 // Add the faceted text 43 segments.push({ 44 text: sliceByCharRange(text, startChar, endChar), 45 facet, 46 }); 47 48 currentPos = endChar; 49 } 50 51 // Add remaining plain text 52 if (currentPos < text.length) { 53 segments.push({ 54 text: sliceByCharRange(text, currentPos, text.length), 55 }); 56 } 57 58 return segments; 59} 60 61/** 62 * Builds a byte offset prefix array for UTF-8 encoded text. 63 * This handles multi-byte characters correctly. 64 */ 65function buildBytePrefix(text: string): number[] { 66 const encoder = new TextEncoder(); 67 const prefix: number[] = [0]; 68 let byteCount = 0; 69 70 for (let i = 0; i < text.length; ) { 71 const codePoint = text.codePointAt(i); 72 if (codePoint === undefined) break; 73 74 const char = String.fromCodePoint(codePoint); 75 const encoded = encoder.encode(char); 76 byteCount += encoded.length; 77 prefix.push(byteCount); 78 79 // Handle surrogate pairs (emojis, etc.) 80 i += codePoint > 0xffff ? 2 : 1; 81 } 82 83 return prefix; 84} 85 86/** 87 * Converts a byte offset to a character index using the byte prefix array. 88 */ 89function byteOffsetToCharIndex(prefix: number[], byteOffset: number): number { 90 for (let i = 0; i < prefix.length; i++) { 91 if (prefix[i] === byteOffset) return i; 92 if (prefix[i] > byteOffset) return Math.max(0, i - 1); 93 } 94 return prefix.length - 1; 95} 96 97/** 98 * Slices text by character range, handling multi-byte characters correctly. 99 */ 100function sliceByCharRange(text: string, start: number, end: number): string { 101 if (start <= 0 && end >= text.length) return text; 102 103 let result = ""; 104 let charIndex = 0; 105 106 for (let i = 0; i < text.length && charIndex < end; ) { 107 const codePoint = text.codePointAt(i); 108 if (codePoint === undefined) break; 109 110 const char = String.fromCodePoint(codePoint); 111 if (charIndex >= start && charIndex < end) { 112 result += char; 113 } 114 115 i += codePoint > 0xffff ? 2 : 1; 116 charIndex++; 117 } 118 119 return result; 120}