···
13
+
"golang.org/x/net/html"
16
+
// Provider configuration
17
+
var oEmbedEndpoints = map[string]string{
18
+
"streamable.com": "https://api.streamable.com/oembed",
19
+
"youtube.com": "https://www.youtube.com/oembed",
20
+
"youtu.be": "https://www.youtube.com/oembed",
21
+
"reddit.com": "https://www.reddit.com/oembed",
24
+
// oEmbedResponse represents a standard oEmbed response
25
+
type oEmbedResponse struct {
26
+
ThumbnailURL string `json:"thumbnail_url"`
27
+
Version string `json:"version"`
28
+
Title string `json:"title"`
29
+
AuthorName string `json:"author_name"`
30
+
ProviderName string `json:"provider_name"`
31
+
ProviderURL string `json:"provider_url"`
32
+
Type string `json:"type"`
33
+
HTML string `json:"html"`
34
+
Description string `json:"description"`
35
+
ThumbnailWidth int `json:"thumbnail_width"`
36
+
ThumbnailHeight int `json:"thumbnail_height"`
37
+
Width int `json:"width"`
38
+
Height int `json:"height"`
41
+
// extractDomain extracts the domain from a URL
42
+
func extractDomain(urlStr string) string {
43
+
parsed, err := url.Parse(urlStr)
47
+
// Remove www. prefix
48
+
domain := strings.TrimPrefix(parsed.Host, "www.")
52
+
// isSupported checks if this is a valid HTTP/HTTPS URL
53
+
func isSupported(urlStr string) bool {
54
+
parsed, err := url.Parse(urlStr)
58
+
scheme := strings.ToLower(parsed.Scheme)
59
+
return scheme == "http" || scheme == "https"
62
+
// isOEmbedProvider checks if we have an oEmbed endpoint for this URL
63
+
func isOEmbedProvider(urlStr string) bool {
64
+
domain := extractDomain(urlStr)
65
+
_, exists := oEmbedEndpoints[domain]
69
+
// fetchOEmbed fetches oEmbed data from the provider
70
+
func fetchOEmbed(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*oEmbedResponse, error) {
71
+
domain := extractDomain(urlStr)
72
+
endpoint, exists := oEmbedEndpoints[domain]
74
+
return nil, fmt.Errorf("no oEmbed endpoint for domain: %s", domain)
77
+
// Build oEmbed request URL
78
+
oembedURL := fmt.Sprintf("%s?url=%s&format=json", endpoint, url.QueryEscape(urlStr))
80
+
// Create HTTP request
81
+
req, err := http.NewRequestWithContext(ctx, "GET", oembedURL, nil)
83
+
return nil, fmt.Errorf("failed to create oEmbed request: %w", err)
86
+
req.Header.Set("User-Agent", userAgent)
88
+
// Create HTTP client with timeout
89
+
client := &http.Client{Timeout: timeout}
90
+
resp, err := client.Do(req)
92
+
return nil, fmt.Errorf("failed to fetch oEmbed data: %w", err)
94
+
defer func() { _ = resp.Body.Close() }()
96
+
if resp.StatusCode != http.StatusOK {
97
+
return nil, fmt.Errorf("oEmbed endpoint returned status %d", resp.StatusCode)
100
+
// Parse JSON response
101
+
var oembed oEmbedResponse
102
+
if err := json.NewDecoder(resp.Body).Decode(&oembed); err != nil {
103
+
return nil, fmt.Errorf("failed to parse oEmbed response: %w", err)
106
+
return &oembed, nil
109
+
// mapOEmbedToResult converts oEmbed response to UnfurlResult
110
+
func mapOEmbedToResult(oembed *oEmbedResponse, originalURL string) *UnfurlResult {
111
+
result := &UnfurlResult{
113
+
Title: oembed.Title,
114
+
Description: oembed.Description,
115
+
ThumbnailURL: oembed.ThumbnailURL,
116
+
Provider: strings.ToLower(oembed.ProviderName),
117
+
Domain: extractDomain(originalURL),
118
+
Width: oembed.Width,
119
+
Height: oembed.Height,
122
+
// Map oEmbed type to our embedType
123
+
switch oembed.Type {
125
+
result.Type = "video"
127
+
result.Type = "image"
129
+
result.Type = "article"
132
+
// If no description but we have author name, use that
133
+
if result.Description == "" && oembed.AuthorName != "" {
134
+
result.Description = fmt.Sprintf("By %s", oembed.AuthorName)
140
+
// openGraphData represents OpenGraph metadata extracted from HTML
141
+
type openGraphData struct {
148
+
// fetchOpenGraph fetches OpenGraph metadata from a URL
149
+
func fetchOpenGraph(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*UnfurlResult, error) {
150
+
// Create HTTP request
151
+
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
153
+
return nil, fmt.Errorf("failed to create request: %w", err)
156
+
req.Header.Set("User-Agent", userAgent)
158
+
// Create HTTP client with timeout
159
+
client := &http.Client{Timeout: timeout}
160
+
resp, err := client.Do(req)
162
+
return nil, fmt.Errorf("failed to fetch URL: %w", err)
164
+
defer func() { _ = resp.Body.Close() }()
166
+
if resp.StatusCode != http.StatusOK {
167
+
return nil, fmt.Errorf("HTTP request returned status %d", resp.StatusCode)
170
+
// Read response body (limit to 10MB to prevent abuse)
171
+
limitedReader := io.LimitReader(resp.Body, 10*1024*1024)
172
+
body, err := io.ReadAll(limitedReader)
174
+
return nil, fmt.Errorf("failed to read response body: %w", err)
177
+
// Parse OpenGraph metadata
178
+
og, err := parseOpenGraph(string(body))
180
+
return nil, fmt.Errorf("failed to parse OpenGraph metadata: %w", err)
183
+
// Build UnfurlResult
184
+
result := &UnfurlResult{
185
+
Type: "article", // Default type for OpenGraph
188
+
Description: og.Description,
189
+
ThumbnailURL: og.Image,
190
+
Provider: "opengraph",
191
+
Domain: extractDomain(urlStr),
194
+
// Use og:url if available and valid
196
+
result.URI = og.URL
202
+
// parseOpenGraph extracts OpenGraph metadata from HTML
203
+
func parseOpenGraph(htmlContent string) (*openGraphData, error) {
204
+
og := &openGraphData{}
205
+
doc, err := html.Parse(strings.NewReader(htmlContent))
207
+
// Try best-effort parsing even with invalid HTML
211
+
// Extract OpenGraph tags and fallbacks
212
+
var pageTitle string
213
+
var metaDescription string
215
+
var traverse func(*html.Node)
216
+
traverse = func(n *html.Node) {
217
+
if n.Type == html.ElementNode {
220
+
property := getAttr(n, "property")
221
+
name := getAttr(n, "name")
222
+
content := getAttr(n, "content")
225
+
if strings.HasPrefix(property, "og:") {
228
+
if og.Title == "" {
231
+
case "og:description":
232
+
if og.Description == "" {
233
+
og.Description = content
236
+
if og.Image == "" {
246
+
// Fallback meta tags
247
+
if name == "description" && metaDescription == "" {
248
+
metaDescription = content
252
+
if pageTitle == "" && n.FirstChild != nil {
253
+
pageTitle = n.FirstChild.Data
258
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
266
+
if og.Title == "" {
267
+
og.Title = pageTitle
269
+
if og.Description == "" {
270
+
og.Description = metaDescription
276
+
// getAttr gets an attribute value from an HTML node
277
+
func getAttr(n *html.Node, key string) string {
278
+
for _, attr := range n.Attr {
279
+
if attr.Key == key {
286
+
// fetchKagiKite handles special unfurling for Kagi Kite news pages
287
+
// Kagi Kite pages use client-side rendering, so og:image tags aren't available at SSR time
288
+
// Instead, we parse the HTML to extract the story image from the page content
289
+
func fetchKagiKite(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*UnfurlResult, error) {
290
+
// Create HTTP request
291
+
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
293
+
return nil, fmt.Errorf("failed to create request: %w", err)
296
+
req.Header.Set("User-Agent", userAgent)
298
+
// Create HTTP client with timeout
299
+
client := &http.Client{Timeout: timeout}
300
+
resp, err := client.Do(req)
302
+
return nil, fmt.Errorf("failed to fetch URL: %w", err)
304
+
defer func() { _ = resp.Body.Close() }()
306
+
if resp.StatusCode != http.StatusOK {
307
+
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
310
+
// Limit response size to 10MB
311
+
limitedReader := io.LimitReader(resp.Body, 10*1024*1024)
314
+
doc, err := html.Parse(limitedReader)
316
+
return nil, fmt.Errorf("failed to parse HTML: %w", err)
319
+
result := &UnfurlResult{
322
+
Domain: "kite.kagi.com",
326
+
// First try OpenGraph tags (in case they get added in the future)
327
+
var findOG func(*html.Node)
328
+
findOG = func(n *html.Node) {
329
+
if n.Type == html.ElementNode && n.Data == "meta" {
330
+
var property, content string
331
+
for _, attr := range n.Attr {
332
+
if attr.Key == "property" {
333
+
property = attr.Val
334
+
} else if attr.Key == "content" {
341
+
if result.Title == "" {
342
+
result.Title = content
344
+
case "og:description":
345
+
if result.Description == "" {
346
+
result.Description = content
349
+
if result.ThumbnailURL == "" {
350
+
result.ThumbnailURL = content
354
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
360
+
// Fallback: Extract from page content
361
+
// Look for images with kagiproxy.com URLs (Kagi's image proxy)
362
+
// Note: Skip the first image as it's often a shared header/logo
363
+
if result.ThumbnailURL == "" {
364
+
var images []struct {
369
+
var findImg func(*html.Node)
370
+
findImg = func(n *html.Node) {
371
+
if n.Type == html.ElementNode && n.Data == "img" {
372
+
for _, attr := range n.Attr {
373
+
if attr.Key == "src" && strings.Contains(attr.Val, "kagiproxy.com") {
374
+
// Get alt text if available
376
+
for _, a := range n.Attr {
377
+
if a.Key == "alt" {
382
+
images = append(images, struct {
385
+
}{url: attr.Val, alt: altText})
390
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
396
+
// Skip first image (often shared header/logo), use second if available
397
+
if len(images) > 1 {
398
+
result.ThumbnailURL = images[1].url
399
+
if result.Description == "" && images[1].alt != "" {
400
+
result.Description = images[1].alt
402
+
} else if len(images) == 1 {
403
+
// Only one image found, use it
404
+
result.ThumbnailURL = images[0].url
405
+
if result.Description == "" && images[0].alt != "" {
406
+
result.Description = images[0].alt
411
+
// Fallback to <title> tag if og:title not found
412
+
if result.Title == "" {
413
+
var findTitle func(*html.Node) string
414
+
findTitle = func(n *html.Node) string {
415
+
if n.Type == html.ElementNode && n.Data == "title" {
416
+
if n.FirstChild != nil && n.FirstChild.Type == html.TextNode {
417
+
return n.FirstChild.Data
420
+
for c := n.FirstChild; c != nil; c = c.NextSibling {
421
+
if title := findTitle(c); title != "" {
427
+
result.Title = findTitle(doc)
430
+
// If still no image, return error
431
+
if result.ThumbnailURL == "" {
432
+
return nil, fmt.Errorf("no image found in Kagi page")