···
+
"golang.org/x/net/html"
+
// Provider configuration
+
var oEmbedEndpoints = map[string]string{
+
"streamable.com": "https://api.streamable.com/oembed",
+
"youtube.com": "https://www.youtube.com/oembed",
+
"youtu.be": "https://www.youtube.com/oembed",
+
"reddit.com": "https://www.reddit.com/oembed",
+
// oEmbedResponse represents a standard oEmbed response
+
type oEmbedResponse struct {
+
ThumbnailURL string `json:"thumbnail_url"`
+
Version string `json:"version"`
+
Title string `json:"title"`
+
AuthorName string `json:"author_name"`
+
ProviderName string `json:"provider_name"`
+
ProviderURL string `json:"provider_url"`
+
Type string `json:"type"`
+
HTML string `json:"html"`
+
Description string `json:"description"`
+
ThumbnailWidth int `json:"thumbnail_width"`
+
ThumbnailHeight int `json:"thumbnail_height"`
+
Width int `json:"width"`
+
Height int `json:"height"`
+
// extractDomain extracts the domain from a URL
+
func extractDomain(urlStr string) string {
+
parsed, err := url.Parse(urlStr)
+
domain := strings.TrimPrefix(parsed.Host, "www.")
+
// isSupported checks if this is a valid HTTP/HTTPS URL
+
func isSupported(urlStr string) bool {
+
parsed, err := url.Parse(urlStr)
+
scheme := strings.ToLower(parsed.Scheme)
+
return scheme == "http" || scheme == "https"
+
// isOEmbedProvider checks if we have an oEmbed endpoint for this URL
+
func isOEmbedProvider(urlStr string) bool {
+
domain := extractDomain(urlStr)
+
_, exists := oEmbedEndpoints[domain]
+
// fetchOEmbed fetches oEmbed data from the provider
+
func fetchOEmbed(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*oEmbedResponse, error) {
+
domain := extractDomain(urlStr)
+
endpoint, exists := oEmbedEndpoints[domain]
+
return nil, fmt.Errorf("no oEmbed endpoint for domain: %s", domain)
+
// Build oEmbed request URL
+
oembedURL := fmt.Sprintf("%s?url=%s&format=json", endpoint, url.QueryEscape(urlStr))
+
req, err := http.NewRequestWithContext(ctx, "GET", oembedURL, nil)
+
return nil, fmt.Errorf("failed to create oEmbed request: %w", err)
+
req.Header.Set("User-Agent", userAgent)
+
// Create HTTP client with timeout
+
client := &http.Client{Timeout: timeout}
+
resp, err := client.Do(req)
+
return nil, fmt.Errorf("failed to fetch oEmbed data: %w", err)
+
defer func() { _ = resp.Body.Close() }()
+
if resp.StatusCode != http.StatusOK {
+
return nil, fmt.Errorf("oEmbed endpoint returned status %d", resp.StatusCode)
+
var oembed oEmbedResponse
+
if err := json.NewDecoder(resp.Body).Decode(&oembed); err != nil {
+
return nil, fmt.Errorf("failed to parse oEmbed response: %w", err)
+
// mapOEmbedToResult converts oEmbed response to UnfurlResult
+
func mapOEmbedToResult(oembed *oEmbedResponse, originalURL string) *UnfurlResult {
+
result := &UnfurlResult{
+
Description: oembed.Description,
+
ThumbnailURL: oembed.ThumbnailURL,
+
Provider: strings.ToLower(oembed.ProviderName),
+
Domain: extractDomain(originalURL),
+
// Map oEmbed type to our embedType
+
result.Type = "article"
+
// If no description but we have author name, use that
+
if result.Description == "" && oembed.AuthorName != "" {
+
result.Description = fmt.Sprintf("By %s", oembed.AuthorName)
+
// openGraphData represents OpenGraph metadata extracted from HTML
+
type openGraphData struct {
+
// fetchOpenGraph fetches OpenGraph metadata from a URL
+
func fetchOpenGraph(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*UnfurlResult, error) {
+
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
+
return nil, fmt.Errorf("failed to create request: %w", err)
+
req.Header.Set("User-Agent", userAgent)
+
// Create HTTP client with timeout
+
client := &http.Client{Timeout: timeout}
+
resp, err := client.Do(req)
+
return nil, fmt.Errorf("failed to fetch URL: %w", err)
+
defer func() { _ = resp.Body.Close() }()
+
if resp.StatusCode != http.StatusOK {
+
return nil, fmt.Errorf("HTTP request returned status %d", resp.StatusCode)
+
// Read response body (limit to 10MB to prevent abuse)
+
limitedReader := io.LimitReader(resp.Body, 10*1024*1024)
+
body, err := io.ReadAll(limitedReader)
+
return nil, fmt.Errorf("failed to read response body: %w", err)
+
// Parse OpenGraph metadata
+
og, err := parseOpenGraph(string(body))
+
return nil, fmt.Errorf("failed to parse OpenGraph metadata: %w", err)
+
result := &UnfurlResult{
+
Type: "article", // Default type for OpenGraph
+
Description: og.Description,
+
ThumbnailURL: og.Image,
+
Domain: extractDomain(urlStr),
+
// Use og:url if available and valid
+
// parseOpenGraph extracts OpenGraph metadata from HTML
+
func parseOpenGraph(htmlContent string) (*openGraphData, error) {
+
doc, err := html.Parse(strings.NewReader(htmlContent))
+
// Try best-effort parsing even with invalid HTML
+
// Extract OpenGraph tags and fallbacks
+
var metaDescription string
+
var traverse func(*html.Node)
+
traverse = func(n *html.Node) {
+
if n.Type == html.ElementNode {
+
property := getAttr(n, "property")
+
name := getAttr(n, "name")
+
content := getAttr(n, "content")
+
if strings.HasPrefix(property, "og:") {
+
if og.Description == "" {
+
og.Description = content
+
if name == "description" && metaDescription == "" {
+
metaDescription = content
+
if pageTitle == "" && n.FirstChild != nil {
+
pageTitle = n.FirstChild.Data
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
+
if og.Description == "" {
+
og.Description = metaDescription
+
// getAttr gets an attribute value from an HTML node
+
func getAttr(n *html.Node, key string) string {
+
for _, attr := range n.Attr {
+
// fetchKagiKite handles special unfurling for Kagi Kite news pages
+
// Kagi Kite pages use client-side rendering, so og:image tags aren't available at SSR time
+
// Instead, we parse the HTML to extract the story image from the page content
+
func fetchKagiKite(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*UnfurlResult, error) {
+
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
+
return nil, fmt.Errorf("failed to create request: %w", err)
+
req.Header.Set("User-Agent", userAgent)
+
// Create HTTP client with timeout
+
client := &http.Client{Timeout: timeout}
+
resp, err := client.Do(req)
+
return nil, fmt.Errorf("failed to fetch URL: %w", err)
+
defer func() { _ = resp.Body.Close() }()
+
if resp.StatusCode != http.StatusOK {
+
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
+
// Limit response size to 10MB
+
limitedReader := io.LimitReader(resp.Body, 10*1024*1024)
+
doc, err := html.Parse(limitedReader)
+
return nil, fmt.Errorf("failed to parse HTML: %w", err)
+
result := &UnfurlResult{
+
Domain: "kite.kagi.com",
+
// First try OpenGraph tags (in case they get added in the future)
+
var findOG func(*html.Node)
+
findOG = func(n *html.Node) {
+
if n.Type == html.ElementNode && n.Data == "meta" {
+
var property, content string
+
for _, attr := range n.Attr {
+
if attr.Key == "property" {
+
} else if attr.Key == "content" {
+
if result.Title == "" {
+
if result.Description == "" {
+
result.Description = content
+
if result.ThumbnailURL == "" {
+
result.ThumbnailURL = content
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
+
// Fallback: Extract from page content
+
// Look for images with kagiproxy.com URLs (Kagi's image proxy)
+
// Note: Skip the first image as it's often a shared header/logo
+
if result.ThumbnailURL == "" {
+
var findImg func(*html.Node)
+
findImg = func(n *html.Node) {
+
if n.Type == html.ElementNode && n.Data == "img" {
+
for _, attr := range n.Attr {
+
if attr.Key == "src" && strings.Contains(attr.Val, "kagiproxy.com") {
+
// Get alt text if available
+
for _, a := range n.Attr {
+
images = append(images, struct {
+
}{url: attr.Val, alt: altText})
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
+
// Skip first image (often shared header/logo), use second if available
+
result.ThumbnailURL = images[1].url
+
if result.Description == "" && images[1].alt != "" {
+
result.Description = images[1].alt
+
} else if len(images) == 1 {
+
// Only one image found, use it
+
result.ThumbnailURL = images[0].url
+
if result.Description == "" && images[0].alt != "" {
+
result.Description = images[0].alt
+
// Fallback to <title> tag if og:title not found
+
if result.Title == "" {
+
var findTitle func(*html.Node) string
+
findTitle = func(n *html.Node) string {
+
if n.Type == html.ElementNode && n.Data == "title" {
+
if n.FirstChild != nil && n.FirstChild.Type == html.TextNode {
+
return n.FirstChild.Data
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
+
if title := findTitle(c); title != "" {
+
result.Title = findTitle(doc)
+
// If still no image, return error
+
if result.ThumbnailURL == "" {
+
return nil, fmt.Errorf("no image found in Kagi page")