lib/utils/richtext.ts at main · nekomimi.pet/atproto-ui

nekomimi.pet / atproto-ui
A React component library for rendering common AT Protocol records for applications such as Bluesky and Leaflet.
atproto-ui / lib / utils / richtext.ts
at main 2.9 kB view raw
  1import type { AppBskyRichtextFacet } from "@atcute/bluesky";
  2
  3export interface TextSegment {
  4	text: string;
  5	facet?: AppBskyRichtextFacet.Main;
  6}
  7
  8/**
  9 * Converts a text string with facets into segments that can be rendered
 10 * with appropriate styling and interactivity.
 11 */
 12export function createTextSegments(
 13	text: string,
 14	facets?: AppBskyRichtextFacet.Main[],
 15): TextSegment[] {
 16	if (!facets || facets.length === 0) {
 17		return [{ text }];
 18	}
 19
 20	// Build byte-to-char index mapping
 21	const bytePrefix = buildBytePrefix(text);
 22
 23	// Sort facets by start position
 24	const sortedFacets = [...facets].sort(
 25		(a, b) => a.index.byteStart - b.index.byteStart,
 26	);
 27
 28	const segments: TextSegment[] = [];
 29	let currentPos = 0;
 30
 31	for (const facet of sortedFacets) {
 32		const startChar = byteOffsetToCharIndex(bytePrefix, facet.index.byteStart);
 33		const endChar = byteOffsetToCharIndex(bytePrefix, facet.index.byteEnd);
 34
 35		// Add plain text before this facet
 36		if (startChar > currentPos) {
 37			segments.push({
 38				text: sliceByCharRange(text, currentPos, startChar),
 39			});
 40		}
 41
 42		// Add the faceted text
 43		segments.push({
 44			text: sliceByCharRange(text, startChar, endChar),
 45			facet,
 46		});
 47
 48		currentPos = endChar;
 49	}
 50
 51	// Add remaining plain text
 52	if (currentPos < text.length) {
 53		segments.push({
 54			text: sliceByCharRange(text, currentPos, text.length),
 55		});
 56	}
 57
 58	return segments;
 59}
 60
 61/**
 62 * Builds a byte offset prefix array for UTF-8 encoded text.
 63 * This handles multi-byte characters correctly.
 64 */
 65function buildBytePrefix(text: string): number[] {
 66	const encoder = new TextEncoder();
 67	const prefix: number[] = [0];
 68	let byteCount = 0;
 69
 70	for (let i = 0; i < text.length; ) {
 71		const codePoint = text.codePointAt(i);
 72		if (codePoint === undefined) break;
 73
 74		const char = String.fromCodePoint(codePoint);
 75		const encoded = encoder.encode(char);
 76		byteCount += encoded.length;
 77		prefix.push(byteCount);
 78
 79		// Handle surrogate pairs (emojis, etc.)
 80		i += codePoint > 0xffff ? 2 : 1;
 81	}
 82
 83	return prefix;
 84}
 85
 86/**
 87 * Converts a byte offset to a character index using the byte prefix array.
 88 */
 89function byteOffsetToCharIndex(prefix: number[], byteOffset: number): number {
 90	for (let i = 0; i < prefix.length; i++) {
 91		if (prefix[i] === byteOffset) return i;
 92		if (prefix[i] > byteOffset) return Math.max(0, i - 1);
 93	}
 94	return prefix.length - 1;
 95}
 96
 97/**
 98 * Slices text by character range, handling multi-byte characters correctly.
 99 */
100function sliceByCharRange(text: string, start: number, end: number): string {
101	if (start <= 0 && end >= text.length) return text;
102
103	let result = "";
104	let charIndex = 0;
105
106	for (let i = 0; i < text.length && charIndex < end; ) {
107		const codePoint = text.codePointAt(i);
108		if (codePoint === undefined) break;
109
110		const char = String.fromCodePoint(codePoint);
111		if (charIndex >= start && charIndex < end) {
112			result += char;
113		}
114
115		i += codePoint > 0xffff ? 2 : 1;
116		charIndex++;
117	}
118
119	return result;
120}