From 6e85dcd3f5585eb0418f01de7d0c7b96f5594ac8 Mon Sep 17 00:00:00 2001 From: uwx Date: Mon, 8 Dec 2025 18:54:24 +0100 Subject: [PATCH 1/2] feat: render full post contents for posts bridged from mastodon or wafrn --- src/components/Post/MastodonHtmlContent.tsx | 235 ++++++++++++ src/lib/strings/html-sanitizer.ts | 356 ++++++++++++++++++ .../components/ThreadItemAnchor.tsx | 60 ++- .../PostThread/components/ThreadItemPost.tsx | 66 +++- src/screens/Settings/DeerSettings.tsx | 27 ++ src/state/persisted/schema.ts | 2 + src/state/preferences/index.tsx | 17 +- .../preferences/render-mastodon-html.tsx | 49 +++ src/view/com/posts/PostFeedItem.tsx | 76 ++-- 9 files changed, 821 insertions(+), 67 deletions(-) create mode 100644 src/components/Post/MastodonHtmlContent.tsx create mode 100644 src/lib/strings/html-sanitizer.ts create mode 100644 src/state/preferences/render-mastodon-html.tsx diff --git a/src/components/Post/MastodonHtmlContent.tsx b/src/components/Post/MastodonHtmlContent.tsx new file mode 100644 index 000000000..97646508f --- /dev/null +++ b/src/components/Post/MastodonHtmlContent.tsx @@ -0,0 +1,235 @@ +import {useMemo} from 'react' +import {type StyleProp, type TextStyle, View, ViewStyle} from 'react-native' +import {type AppBskyFeedPost} from '@atproto/api' + +import {sanitizeHtml} from '#/lib/strings/html-sanitizer' +import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html' +import { atoms } from '#/alf' +import {InlineLinkText} from '#/components/Link' +import {P, Text} from '#/components/Typography' + +interface MastodonHtmlContentProps { + record: AppBskyFeedPost.Record + style?: StyleProp, + textStyle?: StyleProp, + numberOfLines?: number +} + +export function useHasMastodonHtmlContent(record: AppBskyFeedPost.Record) { + const renderMastodonHtml = useRenderMastodonHtml() + + return useMemo(() => { + if (!renderMastodonHtml) return false + + const fullText = (record as any).fullText as string | undefined + const bridgyOriginalText = (record as any).bridgyOriginalText as + | string + | undefined + + return !!(fullText || bridgyOriginalText) + }, [record, renderMastodonHtml]) +} + +export function MastodonHtmlContent({ + record, + style, + textStyle, + numberOfLines, +}: MastodonHtmlContentProps) { + const renderMastodonHtml = useRenderMastodonHtml() + + const htmlContent = useMemo(() => { + if (!renderMastodonHtml) return null + + const fullText = (record as any).fullText as string | undefined + const bridgyOriginalText = (record as any).bridgyOriginalText as + | string + | undefined + + const rawHtml = fullText || bridgyOriginalText + + if (!rawHtml) return null + + return sanitizeHtml(rawHtml) + }, [record, renderMastodonHtml]) + + const renderedContent = useMemo(() => { + if (!htmlContent) return null + + // Parse and render with React components on all platforms + return renderHtmlAsReact(htmlContent, numberOfLines, textStyle) + }, [htmlContent, numberOfLines, textStyle]) + + if (!renderedContent) return null + + return {renderedContent} +} + +function renderHtmlAsReact( + html: string, + _numberOfLines?: number, + inputTextStyle?: StyleProp, +): React.ReactNode { + const parser = new DOMParser() + const doc = parser.parseFromString(html, 'text/html') + + const textStyle: StyleProp = [ + atoms.leading_snug, + atoms.text_md, + inputTextStyle, + ] + + const renderNode = (node: Node, key: number, insideLink = false): React.ReactNode => { + if (node.nodeType === Node.TEXT_NODE) { + // Don't wrap text in styled Text component if inside a link + if (insideLink) { + return node.nodeValue + } + return + {node.nodeValue} + + } + + if (node.nodeType === Node.ELEMENT_NODE) { + const element = node as Element + const children = Array.from(element.childNodes).map((child, i) => + renderNode(child, i, insideLink || element.tagName.toLowerCase() === 'a'), + ) + + switch (element.tagName.toLowerCase()) { + case 'p': + return

{children}

+ case 'blockquote': + return ( + +

{children}

+
+ ) + case 'pre': + return ( + +

{children}

+
+ ) + case 'code': + return ( + + {children} + + ) + case 'strong': + case 'b': + return ( + + {children} + + ) + case 'em': + case 'i': + return ( + + {children} + + ) + case 'u': + return ( + + {children} + + ) + case 'del': + return ( + + {children} + + ) + case 'ul': + return ( + + {children} + + ) + case 'ol': + const start = element.getAttribute('start') + const reversed = element.getAttribute('reversed') !== null + return ( + + {children} + + ) + case 'li': + const value = element.getAttribute('value') + const parentIsOl = element.parentElement?.tagName.toLowerCase() === 'ol' + return ( + + {parentIsOl ? (value || '•') : '•'} + {children} + + ) + case 'a': + const href = element.getAttribute('href') + if (href) { + const linkText = + element.textContent || element.getAttribute('aria-label') || href + const className = element.getAttribute('class') + const isInvisible = className?.includes('invisible') + return ( + + {children} + + ) + } + return {children} + case 'br': + return '\n' + case 'span': + const spanClass = element.getAttribute('class') + // Handle invisible/ellipsis classes for link formatting + if (spanClass?.includes('invisible')) { + return null + } + if (spanClass?.includes('ellipsis')) { + // If inside a link, return plain text, otherwise wrapped + if (insideLink) { + return '…' + } + return … + } + // Handle mentions and hashtags + if (spanClass?.includes('mention') || spanClass?.includes('hashtag')) { + // If inside a link, return children as-is without wrapping + if (insideLink) { + return children + } + return {children} + } + // For spans inside links, return children without wrapping + if (insideLink) { + return children + } + return {children} + case 'div': + return

{children}

+ default: + return {children} + } + } + + return null + } + + const content = Array.from(doc.body.childNodes).map((node, i) => + renderNode(node, i), + ) + + return ( + + {content} + + ) +} diff --git a/src/lib/strings/html-sanitizer.ts b/src/lib/strings/html-sanitizer.ts new file mode 100644 index 000000000..e825faf95 --- /dev/null +++ b/src/lib/strings/html-sanitizer.ts @@ -0,0 +1,356 @@ +/** + * HTML sanitizer inspired by Mastodon's Sanitize::Config + * Sanitizes HTML content to prevent XSS while preserving safe formatting + */ + +const HTTP_PROTOCOLS = ['http', 'https'] + +const LINK_PROTOCOLS = [ + 'http', + 'https', + 'dat', + 'dweb', + 'ipfs', + 'ipns', + 'ssb', + 'gopher', + 'xmpp', + 'magnet', + 'gemini', +] + +const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i + +interface SanitizeOptions { + allowOembed?: boolean +} + +/** + * Sanitizes HTML content following Mastodon's strict rules + */ +export function sanitizeHtml( + html: string, + options: SanitizeOptions = {}, +): string { + if (typeof DOMParser === 'undefined') { + // Fallback for environments without DOMParser + return sanitizeTextOnly(html) + } + + const parser = new DOMParser() + const doc = parser.parseFromString(html, 'text/html') + const body = doc.body + + sanitizeNode(body, options) + + return body.innerHTML +} + +function sanitizeNode(node: Node, options: SanitizeOptions): void { + const childNodes = Array.from(node.childNodes) + + for (const child of childNodes) { + if (child.nodeType === Node.ELEMENT_NODE) { + const element = child as HTMLElement + const tagName = element.tagName.toLowerCase() + + // Define allowed elements + const allowedElements = options.allowOembed + ? [ + 'p', + 'br', + 'span', + 'a', + 'del', + 's', + 'pre', + 'blockquote', + 'code', + 'b', + 'strong', + 'u', + 'i', + 'em', + 'ul', + 'ol', + 'li', + 'ruby', + 'rt', + 'rp', + 'audio', + 'iframe', + 'source', + 'video', + ] + : [ + 'p', + 'br', + 'span', + 'a', + 'del', + 's', + 'pre', + 'blockquote', + 'code', + 'b', + 'strong', + 'u', + 'i', + 'em', + 'ul', + 'ol', + 'li', + 'ruby', + 'rt', + 'rp', + ] + + // Handle unsupported elements (h1-h6) - convert to wrapped in

+ if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { + const strong = element.ownerDocument!.createElement('strong') + while (element.firstChild) { + strong.appendChild(element.firstChild) + } + const p = element.ownerDocument!.createElement('p') + p.appendChild(strong) + element.replaceWith(p) + sanitizeNode(p, options) + continue + } + + // Handle math elements - extract annotation text + if (tagName === 'math') { + const mathText = extractMathAnnotation(element) + if (mathText) { + const textNode = element.ownerDocument!.createTextNode(mathText) + element.replaceWith(textNode) + } else { + element.remove() + } + continue + } + + if (tagName === 'li') { + // Keep li elements but sanitize their children + sanitizeNode(element, options) + continue + } + + // Remove elements not in allowlist + if (!allowedElements.includes(tagName)) { + // Replace with text content + const textNode = element.ownerDocument!.createTextNode( + element.textContent || '', + ) + element.replaceWith(textNode) + continue + } + + // Sanitize attributes + sanitizeAttributes(element, options) + + // Recursively sanitize children + sanitizeNode(element, options) + } + } +} + +function sanitizeAttributes( + element: HTMLElement, + options: SanitizeOptions, +): void { + const tagName = element.tagName.toLowerCase() + const allowedAttrs: Record = { + a: ['href', 'rel', 'class', 'translate'], + span: ['class', 'translate'], + ol: ['start', 'reversed'], + li: ['value'], + p: ['class'], + } + + if (options.allowOembed) { + allowedAttrs.audio = ['controls'] + allowedAttrs.iframe = [ + 'allowfullscreen', + 'frameborder', + 'height', + 'scrolling', + 'src', + 'width', + ] + allowedAttrs.source = ['src', 'type'] + allowedAttrs.video = ['controls', 'height', 'loop', 'width'] + } + + const allowed = allowedAttrs[tagName] || [] + const attrs = Array.from(element.attributes) + + // Remove non-allowed attributes + for (const attr of attrs) { + const attrName = attr.name.toLowerCase() + const isAllowed = allowed.some(a => { + if (a.endsWith('*')) { + return attrName.startsWith(a.slice(0, -1)) + } + return a === attrName + }) + + if (!isAllowed) { + element.removeAttribute(attr.name) + } + } + + // Process specific attributes + if (tagName === 'a') { + processAnchorElement(element) + } + + // Process class whitelist + if (element.hasAttribute('class')) { + processClassWhitelist(element) + } + + // Process translate attribute - remove unless it's "no" + if (element.hasAttribute('translate')) { + const translate = element.getAttribute('translate') + if (translate !== 'no') { + element.removeAttribute('translate') + } + } + + // Validate protocols for elements with src/href + if (element.hasAttribute('href') || element.hasAttribute('src')) { + validateProtocols(element, options) + } +} + +function processAnchorElement(element: HTMLElement): void { + // Add required attributes + element.setAttribute('rel', 'nofollow noopener') + element.setAttribute('target', '_blank') + + // Check if href has unsupported protocol + const href = element.getAttribute('href') + if (href) { + const scheme = getScheme(href) + if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) { + // Replace element with its text content + const textNode = element.ownerDocument!.createTextNode( + element.textContent || '', + ) + element.replaceWith(textNode) + } + } +} + +function processClassWhitelist(element: HTMLElement): void { + const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean) + const whitelisted = classList.filter(className => { + // microformats classes + if (/^[hpuedt]-/.test(className)) return true + // semantic classes + if (/^(mention|hashtag)$/.test(className)) return true + // link formatting classes + if (/^(ellipsis|invisible)$/.test(className)) return true + // quote inline class + if (className === 'quote-inline') return true + return false + }) + + if (whitelisted.length > 0) { + element.className = whitelisted.join(' ') + } else { + element.removeAttribute('class') + } +} + +function validateProtocols( + element: HTMLElement, + options: SanitizeOptions, +): void { + const tagName = element.tagName.toLowerCase() + const src = element.getAttribute('src') + const href = element.getAttribute('href') + const url = src || href + + if (!url) return + + const scheme = getScheme(url) + + // For oembed elements, only allow HTTP protocols for src + if ( + options.allowOembed && + src && + ['iframe', 'source'].includes(tagName) + ) { + if (scheme !== null && !HTTP_PROTOCOLS.includes(scheme)) { + element.removeAttribute('src') + } + // Add sandbox attribute to iframes + if (tagName === 'iframe') { + element.setAttribute( + 'sandbox', + 'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms', + ) + } + } +} + +function getScheme(url: string): string | null { + const match = url.match(PROTOCOL_REGEX) + if (match) { + return match[1].toLowerCase() + } + // Check if it's a relative URL + if (url.startsWith('/') || url.startsWith('.')) { + return 'relative' + } + return null +} + +/** + * Extract math annotation from MathML element + * Follows FEP-dc88 spec for math element representation + */ +function extractMathAnnotation(mathElement: HTMLElement): string | null { + const semantics = Array.from(mathElement.children).find( + child => child.tagName.toLowerCase() === 'semantics', + ) as HTMLElement | undefined + + if (!semantics) return null + + // Look for LaTeX annotation (application/x-tex) + const latexAnnotation = Array.from(semantics.children).find(child => { + return ( + child.tagName.toLowerCase() === 'annotation' && + child.getAttribute('encoding') === 'application/x-tex' + ) + }) + + if (latexAnnotation) { + const display = mathElement.getAttribute('display') + const text = latexAnnotation.textContent || '' + return display === 'block' ? `$$${text}$$` : `$${text}$` + } + + // Look for plain text annotation + const plainAnnotation = Array.from(semantics.children).find(child => { + return ( + child.tagName.toLowerCase() === 'annotation' && + child.getAttribute('encoding') === 'text/plain' + ) + }) + + if (plainAnnotation) { + return plainAnnotation.textContent || null + } + + return null +} + +/** + * Fallback sanitizer that strips all HTML tags + */ +function sanitizeTextOnly(html: string): string { + return html.replace(/<[^>]*>/g, '') +} diff --git a/src/screens/PostThread/components/ThreadItemAnchor.tsx b/src/screens/PostThread/components/ThreadItemAnchor.tsx index 147eeb472..38d61de40 100644 --- a/src/screens/PostThread/components/ThreadItemAnchor.tsx +++ b/src/screens/PostThread/components/ThreadItemAnchor.tsx @@ -56,6 +56,7 @@ import {LabelsOnMyPost} from '#/components/moderation/LabelsOnMe' import {PostAlerts} from '#/components/moderation/PostAlerts' import {type AppModerationCause} from '#/components/Pills' import {Embed, PostEmbedViewContext} from '#/components/Post/Embed' +import {MastodonHtmlContent, useHasMastodonHtmlContent} from '#/components/Post/MastodonHtmlContent' import {PostControls, PostControlsSkeleton} from '#/components/PostControls' import {useFormatPostStatCount} from '#/components/PostControls/util' import {ProfileHoverCard} from '#/components/ProfileHoverCard' @@ -193,6 +194,7 @@ const ThreadItemAnchorInner = memo(function ThreadItemAnchorInner({ const moderation = item.moderation const authorShadow = useProfileShadow(post.author) const {isActive: live} = useActorStatus(post.author) + const hasMastodonHtml = useHasMastodonHtmlContent(record) const richText = useMemo( () => new RichTextAPI({ @@ -398,25 +400,47 @@ const ThreadItemAnchorInner = memo(function ThreadItemAnchorInner({ style={[a.pb_sm]} additionalCauses={additionalPostAlerts} /> - {richText?.text ? ( - - ) : undefined} - {post.embed && ( - - + - + {post.embed && ( + + + + )} + + ) : ( + <> + {richText?.text ? ( + + ) : undefined} + {post.embed && ( + + + + )} + )} new RichTextAPI({ @@ -301,32 +306,53 @@ const ThreadItemPostInner = memo(function ThreadItemPostInner({ style={[a.pb_2xs]} additionalCauses={additionalPostAlerts} /> - {richText?.text ? ( + {hasMastodonHtml ? ( <> - - {limitLines && ( - + {post.embed && ( + + + + )} + + ) : ( + <> + {richText?.text ? ( + <> + + {limitLines && ( + + )} + + ) : undefined} + {post.embed && ( + + + )} - ) : undefined} - {post.embed && ( - - - )} + setRenderMastodonHtml(value)} + style={[a.w_full]}> + + Render Mastodon HTML from bridged posts + + + + + + When enabled, posts bridged from Mastodon will display their + original HTML formatting instead of the plain text version. + + + ) { - - - - {children} - - - + + + + + {children} + + + + diff --git a/src/state/preferences/render-mastodon-html.tsx b/src/state/preferences/render-mastodon-html.tsx new file mode 100644 index 000000000..baf4480da --- /dev/null +++ b/src/state/preferences/render-mastodon-html.tsx @@ -0,0 +1,49 @@ +import React from 'react' + +import * as persisted from '#/state/persisted' + +type StateContext = persisted.Schema['renderMastodonHtml'] +type SetContext = (v: persisted.Schema['renderMastodonHtml']) => void + +const stateContext = React.createContext( + persisted.defaults.renderMastodonHtml, +) +const setContext = React.createContext( + (_: persisted.Schema['renderMastodonHtml']) => {}, +) + +export function Provider({children}: React.PropsWithChildren<{}>) { + const [state, setState] = React.useState(persisted.get('renderMastodonHtml')) + + const setStateWrapped = React.useCallback( + (renderMastodonHtml: persisted.Schema['renderMastodonHtml']) => { + setState(renderMastodonHtml) + persisted.write('renderMastodonHtml', renderMastodonHtml) + }, + [setState], + ) + + React.useEffect(() => { + return persisted.onUpdate('renderMastodonHtml', nextValue => { + setState(nextValue) + }) + }, [setStateWrapped]) + + return ( + + + {children} + + + ) +} + +export function useRenderMastodonHtml() { + return ( + React.useContext(stateContext) ?? persisted.defaults.renderMastodonHtml + ) +} + +export function useSetRenderMastodonHtml() { + return React.useContext(setContext) +} diff --git a/src/view/com/posts/PostFeedItem.tsx b/src/view/com/posts/PostFeedItem.tsx index 67b46aed0..31a2e7a18 100644 --- a/src/view/com/posts/PostFeedItem.tsx +++ b/src/view/com/posts/PostFeedItem.tsx @@ -44,6 +44,10 @@ import {PostAlerts} from '#/components/moderation/PostAlerts' import {type AppModerationCause} from '#/components/Pills' import {Embed} from '#/components/Post/Embed' import {PostEmbedViewContext} from '#/components/Post/Embed/types' +import { + MastodonHtmlContent, + useHasMastodonHtmlContent, +} from '#/components/Post/MastodonHtmlContent' import {PostRepliedTo} from '#/components/Post/PostRepliedTo' import {ShowMoreTextButton} from '#/components/Post/ShowMoreTextButton' import {PostControls} from '#/components/PostControls' @@ -418,6 +422,9 @@ let PostContent = ({ threadgateRecord?: AppBskyFeedThreadgate.Record }): React.ReactNode => { const {currentAccount} = useSession() + const hasMastodonHtml = useHasMastodonHtmlContent( + post.record as AppBskyFeedPost.Record, + ) const [limitLines, setLimitLines] = useState( () => countLines(richText.text) >= MAX_POST_LINES, ) @@ -460,32 +467,57 @@ let PostContent = ({ style={[a.pb_xs]} additionalCauses={additionalPostAlerts} /> - {richText.text ? ( + {hasMastodonHtml ? ( <> - - {limitLines && ( - - )} + {postEmbed ? ( + + + + ) : null} - ) : undefined} - {postEmbed ? ( - - - - ) : null} + ) : ( + <> + {richText.text ? ( + <> + + {limitLines && ( + + )} + + ) : undefined} + {postEmbed ? ( + + + + ) : null} + + )} ) } -- 2.46.2.windows.1 From e7e78fad24b3e1c4d5139c12fc01b19db540ccee Mon Sep 17 00:00:00 2001 From: uwx Date: Mon, 8 Dec 2025 19:07:15 +0100 Subject: [PATCH 2/2] fix: don't duplicate work in MastodonHtmlContent --- src/components/Post/MastodonHtmlContent.tsx | 262 ++++++++++++-- src/lib/strings/html-sanitizer.ts | 356 -------------------- 2 files changed, 231 insertions(+), 387 deletions(-) delete mode 100644 src/lib/strings/html-sanitizer.ts diff --git a/src/components/Post/MastodonHtmlContent.tsx b/src/components/Post/MastodonHtmlContent.tsx index 97646508f..80dabfe0e 100644 --- a/src/components/Post/MastodonHtmlContent.tsx +++ b/src/components/Post/MastodonHtmlContent.tsx @@ -1,8 +1,7 @@ import {useMemo} from 'react' -import {type StyleProp, type TextStyle, View, ViewStyle} from 'react-native' +import {type StyleProp, type TextStyle, View, type ViewStyle} from 'react-native' import {type AppBskyFeedPost} from '@atproto/api' -import {sanitizeHtml} from '#/lib/strings/html-sanitizer' import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html' import { atoms } from '#/alf' import {InlineLinkText} from '#/components/Link' @@ -38,7 +37,7 @@ export function MastodonHtmlContent({ }: MastodonHtmlContentProps) { const renderMastodonHtml = useRenderMastodonHtml() - const htmlContent = useMemo(() => { + const renderedContent = useMemo(() => { if (!renderMastodonHtml) return null const fullText = (record as any).fullText as string | undefined @@ -50,26 +49,64 @@ export function MastodonHtmlContent({ if (!rawHtml) return null - return sanitizeHtml(rawHtml) - }, [record, renderMastodonHtml]) - - const renderedContent = useMemo(() => { - if (!htmlContent) return null - - // Parse and render with React components on all platforms - return renderHtmlAsReact(htmlContent, numberOfLines, textStyle) - }, [htmlContent, numberOfLines, textStyle]) + // Parse HTML once and sanitize/render in a single pass + return sanitizeAndRenderHtml(rawHtml, numberOfLines, textStyle) + }, [record, renderMastodonHtml, numberOfLines, textStyle]) if (!renderedContent) return null return {renderedContent} } -function renderHtmlAsReact( +const LINK_PROTOCOLS = [ + 'http', + 'https', + 'dat', + 'dweb', + 'ipfs', + 'ipns', + 'ssb', + 'gopher', + 'xmpp', + 'magnet', + 'gemini', +] + +const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i + +const ALLOWED_ELEMENTS = [ + 'p', + 'br', + 'span', + 'a', + 'del', + 's', + 'pre', + 'blockquote', + 'code', + 'b', + 'strong', + 'u', + 'i', + 'em', + 'ul', + 'ol', + 'li', + 'ruby', + 'rt', + 'rp', +] + +function sanitizeAndRenderHtml( html: string, _numberOfLines?: number, inputTextStyle?: StyleProp, ): React.ReactNode { + if (typeof DOMParser === 'undefined') { + // Fallback for environments without DOMParser + return html.replace(/<[^>]*>/g, '') + } + const parser = new DOMParser() const doc = parser.parseFromString(html, 'text/html') @@ -79,6 +116,7 @@ function renderHtmlAsReact( inputTextStyle, ] + // Sanitize and render in a single pass const renderNode = (node: Node, key: number, insideLink = false): React.ReactNode => { if (node.nodeType === Node.TEXT_NODE) { // Don't wrap text in styled Text component if inside a link @@ -92,11 +130,44 @@ function renderHtmlAsReact( if (node.nodeType === Node.ELEMENT_NODE) { const element = node as Element + const tagName = element.tagName.toLowerCase() + + // Handle unsupported elements (h1-h6) - convert to wrapped in

+ if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { + const children = Array.from(element.childNodes).map((child, i) => + renderNode(child, i, insideLink), + ) + return ( +

+ {children} +

+ ) + } + + // Handle math elements - extract annotation text + if (tagName === 'math') { + const mathText = extractMathAnnotation(element) + if (mathText) { + return {mathText} + } + return null + } + + // Remove elements not in allowlist - replace with text content + if (!ALLOWED_ELEMENTS.includes(tagName)) { + return element.textContent ? ( + {element.textContent} + ) : null + } + + // Sanitize and process element + sanitizeElementAttributes(element) + const children = Array.from(element.childNodes).map((child, i) => - renderNode(child, i, insideLink || element.tagName.toLowerCase() === 'a'), + renderNode(child, i, insideLink || tagName === 'a'), ) - switch (element.tagName.toLowerCase()) { + switch (tagName) { case 'p': return

{children}

case 'blockquote': @@ -108,38 +179,39 @@ function renderHtmlAsReact( case 'pre': return ( -

{children}

+

{children}

) case 'code': return ( - + {children} ) case 'strong': case 'b': return ( - + {children} ) case 'em': case 'i': return ( - + {children} ) case 'u': return ( - + {children} ) case 'del': + case 's': return ( - + {children} ) @@ -150,22 +222,24 @@ function renderHtmlAsReact( ) case 'ol': - const start = element.getAttribute('start') - const reversed = element.getAttribute('reversed') !== null return ( - + {children} ) case 'li': - const value = element.getAttribute('value') const parentIsOl = element.parentElement?.tagName.toLowerCase() === 'ol' return ( - {parentIsOl ? (value || 'ΓÇó') : 'ΓÇó'} - {children} + {parentIsOl ? 'ΓÇó' : 'ΓÇó'} + {children} ) + case 'ruby': + return {children} + case 'rt': + case 'rp': + return null // TODO support ruby text rendering case 'a': const href = element.getAttribute('href') if (href) { @@ -179,7 +253,7 @@ function renderHtmlAsReact( to={href} label={linkText} shouldProxy - style={isInvisible ? {width: 0, height: 0, position: 'absolute'} : textStyle}> + style={isInvisible ? {display: 'none'} : textStyle}> {children} ) @@ -191,7 +265,7 @@ function renderHtmlAsReact( const spanClass = element.getAttribute('class') // Handle invisible/ellipsis classes for link formatting if (spanClass?.includes('invisible')) { - return null + return {children} } if (spanClass?.includes('ellipsis')) { // If inside a link, return plain text, otherwise wrapped @@ -213,8 +287,6 @@ function renderHtmlAsReact( return children } return {children} - case 'div': - return

{children}

default: return {children} } @@ -233,3 +305,131 @@ function renderHtmlAsReact(
) } + +function sanitizeElementAttributes(element: Element): void { + const tagName = element.tagName.toLowerCase() + const allowedAttrs: Record = { + a: ['href', 'rel', 'class', 'translate'], + span: ['class', 'translate'], + ol: ['start', 'reversed'], + li: ['value'], + p: ['class'], + } + + const allowed = allowedAttrs[tagName] || [] + const attrs = Array.from(element.attributes) + + // Remove non-allowed attributes + for (const attr of attrs) { + const attrName = attr.name.toLowerCase() + const isAllowed = allowed.some(a => { + if (a.endsWith('*')) { + return attrName.startsWith(a.slice(0, -1)) + } + return a === attrName + }) + + if (!isAllowed) { + element.removeAttribute(attr.name) + } + } + + // Process specific attributes + if (tagName === 'a') { + processAnchorElement(element) + } + + // Process class whitelist + if (element.hasAttribute('class')) { + processClassWhitelist(element) + } + + // Process translate attribute - remove unless it's "no" + if (element.hasAttribute('translate')) { + const translate = element.getAttribute('translate') + if (translate !== 'no') { + element.removeAttribute('translate') + } + } +} + +function processAnchorElement(element: Element): void { + // Check if href has unsupported protocol + const href = element.getAttribute('href') + if (href) { + const scheme = getScheme(href) + if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) { + // Remove the href to disable the link + element.removeAttribute('href') + } + } +} + +function processClassWhitelist(element: Element): void { + const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean) + const whitelisted = classList.filter(className => { + // microformats classes + if (/^[hpuedt]-/.test(className)) return true + // semantic classes + if (/^(mention|hashtag)$/.test(className)) return true + // link formatting classes + if (/^(ellipsis|invisible)$/.test(className)) return true + // quote inline class + if (className === 'quote-inline') return true + return false + }) + + if (whitelisted.length > 0) { + element.className = whitelisted.join(' ') + } else { + element.removeAttribute('class') + } +} + +function getScheme(url: string): string | null { + const match = url.match(PROTOCOL_REGEX) + if (match) { + return match[1].toLowerCase() + } + // Check if it's a relative URL + if (url.startsWith('/') || url.startsWith('.')) { + return 'relative' + } + return null +} + +function extractMathAnnotation(mathElement: Element): string | null { + const semantics = Array.from(mathElement.children).find( + child => child.tagName.toLowerCase() === 'semantics', + ) as Element | undefined + + if (!semantics) return null + + // Look for LaTeX annotation (application/x-tex) + const latexAnnotation = Array.from(semantics.children).find(child => { + return ( + child.tagName.toLowerCase() === 'annotation' && + child.getAttribute('encoding') === 'application/x-tex' + ) + }) + + if (latexAnnotation) { + const display = mathElement.getAttribute('display') + const text = latexAnnotation.textContent || '' + return display === 'block' ? `$$${text}$$` : `$${text}$` + } + + // Look for plain text annotation + const plainAnnotation = Array.from(semantics.children).find(child => { + return ( + child.tagName.toLowerCase() === 'annotation' && + child.getAttribute('encoding') === 'text/plain' + ) + }) + + if (plainAnnotation) { + return plainAnnotation.textContent || null + } + + return null +} diff --git a/src/lib/strings/html-sanitizer.ts b/src/lib/strings/html-sanitizer.ts deleted file mode 100644 index e825faf95..000000000 --- a/src/lib/strings/html-sanitizer.ts +++ /dev/null @@ -1,356 +0,0 @@ -/** - * HTML sanitizer inspired by Mastodon's Sanitize::Config - * Sanitizes HTML content to prevent XSS while preserving safe formatting - */ - -const HTTP_PROTOCOLS = ['http', 'https'] - -const LINK_PROTOCOLS = [ - 'http', - 'https', - 'dat', - 'dweb', - 'ipfs', - 'ipns', - 'ssb', - 'gopher', - 'xmpp', - 'magnet', - 'gemini', -] - -const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i - -interface SanitizeOptions { - allowOembed?: boolean -} - -/** - * Sanitizes HTML content following Mastodon's strict rules - */ -export function sanitizeHtml( - html: string, - options: SanitizeOptions = {}, -): string { - if (typeof DOMParser === 'undefined') { - // Fallback for environments without DOMParser - return sanitizeTextOnly(html) - } - - const parser = new DOMParser() - const doc = parser.parseFromString(html, 'text/html') - const body = doc.body - - sanitizeNode(body, options) - - return body.innerHTML -} - -function sanitizeNode(node: Node, options: SanitizeOptions): void { - const childNodes = Array.from(node.childNodes) - - for (const child of childNodes) { - if (child.nodeType === Node.ELEMENT_NODE) { - const element = child as HTMLElement - const tagName = element.tagName.toLowerCase() - - // Define allowed elements - const allowedElements = options.allowOembed - ? [ - 'p', - 'br', - 'span', - 'a', - 'del', - 's', - 'pre', - 'blockquote', - 'code', - 'b', - 'strong', - 'u', - 'i', - 'em', - 'ul', - 'ol', - 'li', - 'ruby', - 'rt', - 'rp', - 'audio', - 'iframe', - 'source', - 'video', - ] - : [ - 'p', - 'br', - 'span', - 'a', - 'del', - 's', - 'pre', - 'blockquote', - 'code', - 'b', - 'strong', - 'u', - 'i', - 'em', - 'ul', - 'ol', - 'li', - 'ruby', - 'rt', - 'rp', - ] - - // Handle unsupported elements (h1-h6) - convert to wrapped in

- if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { - const strong = element.ownerDocument!.createElement('strong') - while (element.firstChild) { - strong.appendChild(element.firstChild) - } - const p = element.ownerDocument!.createElement('p') - p.appendChild(strong) - element.replaceWith(p) - sanitizeNode(p, options) - continue - } - - // Handle math elements - extract annotation text - if (tagName === 'math') { - const mathText = extractMathAnnotation(element) - if (mathText) { - const textNode = element.ownerDocument!.createTextNode(mathText) - element.replaceWith(textNode) - } else { - element.remove() - } - continue - } - - if (tagName === 'li') { - // Keep li elements but sanitize their children - sanitizeNode(element, options) - continue - } - - // Remove elements not in allowlist - if (!allowedElements.includes(tagName)) { - // Replace with text content - const textNode = element.ownerDocument!.createTextNode( - element.textContent || '', - ) - element.replaceWith(textNode) - continue - } - - // Sanitize attributes - sanitizeAttributes(element, options) - - // Recursively sanitize children - sanitizeNode(element, options) - } - } -} - -function sanitizeAttributes( - element: HTMLElement, - options: SanitizeOptions, -): void { - const tagName = element.tagName.toLowerCase() - const allowedAttrs: Record = { - a: ['href', 'rel', 'class', 'translate'], - span: ['class', 'translate'], - ol: ['start', 'reversed'], - li: ['value'], - p: ['class'], - } - - if (options.allowOembed) { - allowedAttrs.audio = ['controls'] - allowedAttrs.iframe = [ - 'allowfullscreen', - 'frameborder', - 'height', - 'scrolling', - 'src', - 'width', - ] - allowedAttrs.source = ['src', 'type'] - allowedAttrs.video = ['controls', 'height', 'loop', 'width'] - } - - const allowed = allowedAttrs[tagName] || [] - const attrs = Array.from(element.attributes) - - // Remove non-allowed attributes - for (const attr of attrs) { - const attrName = attr.name.toLowerCase() - const isAllowed = allowed.some(a => { - if (a.endsWith('*')) { - return attrName.startsWith(a.slice(0, -1)) - } - return a === attrName - }) - - if (!isAllowed) { - element.removeAttribute(attr.name) - } - } - - // Process specific attributes - if (tagName === 'a') { - processAnchorElement(element) - } - - // Process class whitelist - if (element.hasAttribute('class')) { - processClassWhitelist(element) - } - - // Process translate attribute - remove unless it's "no" - if (element.hasAttribute('translate')) { - const translate = element.getAttribute('translate') - if (translate !== 'no') { - element.removeAttribute('translate') - } - } - - // Validate protocols for elements with src/href - if (element.hasAttribute('href') || element.hasAttribute('src')) { - validateProtocols(element, options) - } -} - -function processAnchorElement(element: HTMLElement): void { - // Add required attributes - element.setAttribute('rel', 'nofollow noopener') - element.setAttribute('target', '_blank') - - // Check if href has unsupported protocol - const href = element.getAttribute('href') - if (href) { - const scheme = getScheme(href) - if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) { - // Replace element with its text content - const textNode = element.ownerDocument!.createTextNode( - element.textContent || '', - ) - element.replaceWith(textNode) - } - } -} - -function processClassWhitelist(element: HTMLElement): void { - const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean) - const whitelisted = classList.filter(className => { - // microformats classes - if (/^[hpuedt]-/.test(className)) return true - // semantic classes - if (/^(mention|hashtag)$/.test(className)) return true - // link formatting classes - if (/^(ellipsis|invisible)$/.test(className)) return true - // quote inline class - if (className === 'quote-inline') return true - return false - }) - - if (whitelisted.length > 0) { - element.className = whitelisted.join(' ') - } else { - element.removeAttribute('class') - } -} - -function validateProtocols( - element: HTMLElement, - options: SanitizeOptions, -): void { - const tagName = element.tagName.toLowerCase() - const src = element.getAttribute('src') - const href = element.getAttribute('href') - const url = src || href - - if (!url) return - - const scheme = getScheme(url) - - // For oembed elements, only allow HTTP protocols for src - if ( - options.allowOembed && - src && - ['iframe', 'source'].includes(tagName) - ) { - if (scheme !== null && !HTTP_PROTOCOLS.includes(scheme)) { - element.removeAttribute('src') - } - // Add sandbox attribute to iframes - if (tagName === 'iframe') { - element.setAttribute( - 'sandbox', - 'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms', - ) - } - } -} - -function getScheme(url: string): string | null { - const match = url.match(PROTOCOL_REGEX) - if (match) { - return match[1].toLowerCase() - } - // Check if it's a relative URL - if (url.startsWith('/') || url.startsWith('.')) { - return 'relative' - } - return null -} - -/** - * Extract math annotation from MathML element - * Follows FEP-dc88 spec for math element representation - */ -function extractMathAnnotation(mathElement: HTMLElement): string | null { - const semantics = Array.from(mathElement.children).find( - child => child.tagName.toLowerCase() === 'semantics', - ) as HTMLElement | undefined - - if (!semantics) return null - - // Look for LaTeX annotation (application/x-tex) - const latexAnnotation = Array.from(semantics.children).find(child => { - return ( - child.tagName.toLowerCase() === 'annotation' && - child.getAttribute('encoding') === 'application/x-tex' - ) - }) - - if (latexAnnotation) { - const display = mathElement.getAttribute('display') - const text = latexAnnotation.textContent || '' - return display === 'block' ? `$$${text}$$` : `$${text}$` - } - - // Look for plain text annotation - const plainAnnotation = Array.from(semantics.children).find(child => { - return ( - child.tagName.toLowerCase() === 'annotation' && - child.getAttribute('encoding') === 'text/plain' - ) - }) - - if (plainAnnotation) { - return plainAnnotation.textContent || null - } - - return null -} - -/** - * Fallback sanitizer that strips all HTML tags - */ -function sanitizeTextOnly(html: string): string { - return html.replace(/<[^>]*>/g, '') -} -- 2.46.2.windows.1 From 3e5262ab91f66c0233b53dd4833b13a3e2e84410 Mon Sep 17 00:00:00 2001 From: uwx Date: Mon, 8 Dec 2025 20:17:20 +0100 Subject: [PATCH 1/4] chore: remove any casts --- src/components/Post/MastodonHtmlContent.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/components/Post/MastodonHtmlContent.tsx b/src/components/Post/MastodonHtmlContent.tsx index 80dabfe0e..142e38cf5 100644 --- a/src/components/Post/MastodonHtmlContent.tsx +++ b/src/components/Post/MastodonHtmlContent.tsx @@ -20,8 +20,8 @@ export function useHasMastodonHtmlContent(record: AppBskyFeedPost.Record) { return useMemo(() => { if (!renderMastodonHtml) return false - const fullText = (record as any).fullText as string | undefined - const bridgyOriginalText = (record as any).bridgyOriginalText as + const fullText = record.fullText as string | undefined + const bridgyOriginalText = record.bridgyOriginalText as | string | undefined @@ -40,8 +40,8 @@ export function MastodonHtmlContent({ const renderedContent = useMemo(() => { if (!renderMastodonHtml) return null - const fullText = (record as any).fullText as string | undefined - const bridgyOriginalText = (record as any).bridgyOriginalText as + const fullText = record.fullText as string | undefined + const bridgyOriginalText = record.bridgyOriginalText as | string | undefined -- 2.46.2.windows.1 From 265f3ab40bc28f1d30e93335fe6961bfdaf2719a Mon Sep 17 00:00:00 2001 From: uwx Date: Mon, 8 Dec 2025 20:23:58 +0100 Subject: [PATCH 2/4] chore: replace unicode ellipsis with escaped version --- src/components/Post/MastodonHtmlContent.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/Post/MastodonHtmlContent.tsx b/src/components/Post/MastodonHtmlContent.tsx index 142e38cf5..c582a8143 100644 --- a/src/components/Post/MastodonHtmlContent.tsx +++ b/src/components/Post/MastodonHtmlContent.tsx @@ -270,9 +270,9 @@ function sanitizeAndRenderHtml( if (spanClass?.includes('ellipsis')) { // If inside a link, return plain text, otherwise wrapped if (insideLink) { - return '…' + return '\u2026' } - return … + return {'\u2026'} } // Handle mentions and hashtags if (spanClass?.includes('mention') || spanClass?.includes('hashtag')) { -- 2.46.2.windows.1 From eff00beb1864bbdbda92dbc409d3112fdd58c1fb Mon Sep 17 00:00:00 2001 From: uwx Date: Mon, 8 Dec 2025 20:26:34 +0100 Subject: [PATCH 3/4] feat/MastodonHtml: render

    as ordered lists (with numeric prefixes) --- src/components/Post/MastodonHtmlContent.tsx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/components/Post/MastodonHtmlContent.tsx b/src/components/Post/MastodonHtmlContent.tsx index c582a8143..c470c59fc 100644 --- a/src/components/Post/MastodonHtmlContent.tsx +++ b/src/components/Post/MastodonHtmlContent.tsx @@ -117,7 +117,7 @@ function sanitizeAndRenderHtml( ] // Sanitize and render in a single pass - const renderNode = (node: Node, key: number, insideLink = false): React.ReactNode => { + const renderNode = (node: Node, key: string, insideLink = false, listItemIndex?: number): React.ReactNode => { if (node.nodeType === Node.TEXT_NODE) { // Don't wrap text in styled Text component if inside a link if (insideLink) { @@ -135,7 +135,7 @@ function sanitizeAndRenderHtml( // Handle unsupported elements (h1-h6) - convert to wrapped in

    if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { const children = Array.from(element.childNodes).map((child, i) => - renderNode(child, i, insideLink), + renderNode(child, String(i), insideLink), ) return (

    @@ -164,7 +164,7 @@ function sanitizeAndRenderHtml( sanitizeElementAttributes(element) const children = Array.from(element.childNodes).map((child, i) => - renderNode(child, i, insideLink || tagName === 'a'), + renderNode(child, String(i), insideLink || tagName === 'a'), ) switch (tagName) { @@ -222,16 +222,20 @@ function sanitizeAndRenderHtml( ) case 'ol': + const start = element.getAttribute('start') + const startNum = start ? parseInt(start, 10) : 1 return ( - {children} + {Array.from(element.childNodes) + .filter(child => child.nodeType === Node.ELEMENT_NODE && (child as Element).tagName.toLowerCase() === 'li') + .map((child, i) => renderNode(child, `${key}-${i}`, insideLink, startNum + i))} ) case 'li': - const parentIsOl = element.parentElement?.tagName.toLowerCase() === 'ol' + const marker = listItemIndex !== undefined ? `${listItemIndex}.` : '\u2022' return ( - {parentIsOl ? 'ΓÇó' : 'ΓÇó'} + {marker} {children} ) -- 2.46.2.windows.1 From a28c6d3f8b3ebd8b039549450775301ad5996b84 Mon Sep 17 00:00:00 2001 From: uwx Date: Mon, 8 Dec 2025 20:32:58 +0100 Subject: [PATCH 4/4] feat/MastodonHtml: collapse posts taller than 150px --- src/components/Post/MastodonHtmlContent.tsx | 67 +++++++++++++++++---- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/src/components/Post/MastodonHtmlContent.tsx b/src/components/Post/MastodonHtmlContent.tsx index c470c59fc..eec379b5e 100644 --- a/src/components/Post/MastodonHtmlContent.tsx +++ b/src/components/Post/MastodonHtmlContent.tsx @@ -1,9 +1,18 @@ -import {useMemo} from 'react' -import {type StyleProp, type TextStyle, View, type ViewStyle} from 'react-native' +import {useMemo, useState} from 'react' +import { + type LayoutChangeEvent, + type StyleProp, + type TextStyle, + View, + type ViewStyle, +} from 'react-native' import {type AppBskyFeedPost} from '@atproto/api' +import {msg, Trans} from '@lingui/macro' +import {useLingui} from '@lingui/react' import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html' -import { atoms } from '#/alf' +import {atoms as a} from '#/alf' +import {Button, ButtonText} from '#/components/Button' import {InlineLinkText} from '#/components/Link' import {P, Text} from '#/components/Typography' @@ -36,6 +45,10 @@ export function MastodonHtmlContent({ numberOfLines, }: MastodonHtmlContentProps) { const renderMastodonHtml = useRenderMastodonHtml() + const {_} = useLingui() + const [isExpanded, setIsExpanded] = useState(false) + const [contentHeight, setContentHeight] = useState(null) + const [isTall, setIsTall] = useState(false) const renderedContent = useMemo(() => { if (!renderMastodonHtml) return null @@ -53,9 +66,41 @@ export function MastodonHtmlContent({ return sanitizeAndRenderHtml(rawHtml, numberOfLines, textStyle) }, [record, renderMastodonHtml, numberOfLines, textStyle]) + const handleLayout = (event: LayoutChangeEvent) => { + const height = event.nativeEvent.layout.height + if (contentHeight === null) { + setContentHeight(height) + // Consider content "tall" if it's taller than 150px + setIsTall(height > 150) + } + } + if (!renderedContent) return null - return {renderedContent} + const shouldCollapse = isTall && !isExpanded + + return ( + + + {renderedContent} + + {shouldCollapse && ( + + )} + + ) } const LINK_PROTOCOLS = [ @@ -111,8 +156,8 @@ function sanitizeAndRenderHtml( const doc = parser.parseFromString(html, 'text/html') const textStyle: StyleProp = [ - atoms.leading_snug, - atoms.text_md, + a.leading_snug, + a.text_md, inputTextStyle, ] @@ -300,7 +345,7 @@ function sanitizeAndRenderHtml( } const content = Array.from(doc.body.childNodes).map((node, i) => - renderNode(node, i), + renderNode(node, String(i)), ) return ( @@ -326,11 +371,11 @@ function sanitizeElementAttributes(element: Element): void { // Remove non-allowed attributes for (const attr of attrs) { const attrName = attr.name.toLowerCase() - const isAllowed = allowed.some(a => { - if (a.endsWith('*')) { - return attrName.startsWith(a.slice(0, -1)) + const isAllowed = allowed.some(allowedAttr => { + if (allowedAttr.endsWith('*')) { + return attrName.startsWith(allowedAttr.slice(0, -1)) } - return a === attrName + return allowedAttr === attrName }) if (!isAllowed) { -- 2.46.2.windows.1