A community based topic aggregation platform built on atproto
1package unfurl
2
3import (
4 "context"
5 "net/http"
6 "net/http/httptest"
7 "testing"
8 "time"
9
10 "github.com/stretchr/testify/assert"
11 "github.com/stretchr/testify/require"
12)
13
14func TestParseOpenGraph_ValidTags(t *testing.T) {
15 html := `
16<!DOCTYPE html>
17<html>
18<head>
19 <meta property="og:title" content="Test Article Title" />
20 <meta property="og:description" content="This is a test description" />
21 <meta property="og:image" content="https://example.com/image.jpg" />
22 <meta property="og:url" content="https://example.com/canonical" />
23</head>
24<body>
25 <p>Some content</p>
26</body>
27</html>
28`
29
30 og, err := parseOpenGraph(html)
31 require.NoError(t, err)
32
33 assert.Equal(t, "Test Article Title", og.Title)
34 assert.Equal(t, "This is a test description", og.Description)
35 assert.Equal(t, "https://example.com/image.jpg", og.Image)
36 assert.Equal(t, "https://example.com/canonical", og.URL)
37}
38
39func TestParseOpenGraph_MissingImage(t *testing.T) {
40 html := `
41<!DOCTYPE html>
42<html>
43<head>
44 <meta property="og:title" content="Article Without Image" />
45 <meta property="og:description" content="No image tag" />
46</head>
47<body></body>
48</html>
49`
50
51 og, err := parseOpenGraph(html)
52 require.NoError(t, err)
53
54 assert.Equal(t, "Article Without Image", og.Title)
55 assert.Equal(t, "No image tag", og.Description)
56 assert.Empty(t, og.Image, "Image should be empty when not provided")
57}
58
59func TestParseOpenGraph_FallbackToTitle(t *testing.T) {
60 html := `
61<!DOCTYPE html>
62<html>
63<head>
64 <title>Page Title Fallback</title>
65 <meta name="description" content="Meta description fallback" />
66</head>
67<body></body>
68</html>
69`
70
71 og, err := parseOpenGraph(html)
72 require.NoError(t, err)
73
74 assert.Equal(t, "Page Title Fallback", og.Title, "Should fall back to <title>")
75 assert.Equal(t, "Meta description fallback", og.Description, "Should fall back to meta description")
76}
77
78func TestParseOpenGraph_PreferOpenGraphOverFallback(t *testing.T) {
79 html := `
80<!DOCTYPE html>
81<html>
82<head>
83 <title>Page Title</title>
84 <meta name="description" content="Meta description" />
85 <meta property="og:title" content="OpenGraph Title" />
86 <meta property="og:description" content="OpenGraph Description" />
87</head>
88<body></body>
89</html>
90`
91
92 og, err := parseOpenGraph(html)
93 require.NoError(t, err)
94
95 assert.Equal(t, "OpenGraph Title", og.Title, "Should prefer og:title")
96 assert.Equal(t, "OpenGraph Description", og.Description, "Should prefer og:description")
97}
98
99func TestParseOpenGraph_MalformedHTML(t *testing.T) {
100 html := `
101<!DOCTYPE html>
102<html>
103<head>
104 <meta property="og:title" content="Still Works" />
105 <meta property="og:description" content="Even with broken tags
106</head>
107<body>
108 <p>Unclosed paragraph
109</body>
110`
111
112 og, err := parseOpenGraph(html)
113 require.NoError(t, err)
114
115 // Best-effort parsing should still extract what it can
116 assert.NotEmpty(t, og.Title, "Should extract title despite malformed HTML")
117}
118
119func TestParseOpenGraph_Empty(t *testing.T) {
120 html := `
121<!DOCTYPE html>
122<html>
123<head></head>
124<body></body>
125</html>
126`
127
128 og, err := parseOpenGraph(html)
129 require.NoError(t, err)
130
131 assert.Empty(t, og.Title)
132 assert.Empty(t, og.Description)
133 assert.Empty(t, og.Image)
134}
135
136func TestFetchOpenGraph_Success(t *testing.T) {
137 // Create test server with OpenGraph metadata
138 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
139 assert.Contains(t, r.Header.Get("User-Agent"), "CovesBot")
140
141 html := `
142<!DOCTYPE html>
143<html>
144<head>
145 <meta property="og:title" content="Test News Article" />
146 <meta property="og:description" content="Breaking news story" />
147 <meta property="og:image" content="https://example.com/news.jpg" />
148 <meta property="og:url" content="https://example.com/article/123" />
149</head>
150<body><p>Article content</p></body>
151</html>
152`
153 w.Header().Set("Content-Type", "text/html")
154 w.WriteHeader(http.StatusOK)
155 _, _ = w.Write([]byte(html))
156 }))
157 defer server.Close()
158
159 ctx := context.Background()
160 result, err := fetchOpenGraph(ctx, server.URL, 10*time.Second, "CovesBot/1.0")
161 require.NoError(t, err)
162 require.NotNil(t, result)
163
164 assert.Equal(t, "Test News Article", result.Title)
165 assert.Equal(t, "Breaking news story", result.Description)
166 assert.Equal(t, "https://example.com/news.jpg", result.ThumbnailURL)
167 assert.Equal(t, "article", result.Type)
168 assert.Equal(t, "opengraph", result.Provider)
169}
170
171func TestFetchOpenGraph_HTTPError(t *testing.T) {
172 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
173 w.WriteHeader(http.StatusNotFound)
174 }))
175 defer server.Close()
176
177 ctx := context.Background()
178 result, err := fetchOpenGraph(ctx, server.URL, 10*time.Second, "CovesBot/1.0")
179 require.Error(t, err)
180 assert.Nil(t, result)
181 assert.Contains(t, err.Error(), "404")
182}
183
184func TestFetchOpenGraph_Timeout(t *testing.T) {
185 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
186 time.Sleep(2 * time.Second)
187 w.WriteHeader(http.StatusOK)
188 }))
189 defer server.Close()
190
191 ctx := context.Background()
192 result, err := fetchOpenGraph(ctx, server.URL, 100*time.Millisecond, "CovesBot/1.0")
193 require.Error(t, err)
194 assert.Nil(t, result)
195}
196
197func TestFetchOpenGraph_NoMetadata(t *testing.T) {
198 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
199 html := `<html><head></head><body><p>No metadata</p></body></html>`
200 w.Header().Set("Content-Type", "text/html")
201 w.WriteHeader(http.StatusOK)
202 _, _ = w.Write([]byte(html))
203 }))
204 defer server.Close()
205
206 ctx := context.Background()
207 result, err := fetchOpenGraph(ctx, server.URL, 10*time.Second, "CovesBot/1.0")
208 require.NoError(t, err)
209 require.NotNil(t, result)
210
211 // Should still return a result with domain
212 assert.Equal(t, "article", result.Type)
213 assert.Equal(t, "opengraph", result.Provider)
214 assert.NotEmpty(t, result.Domain)
215}
216
217func TestIsOEmbedProvider(t *testing.T) {
218 tests := []struct {
219 url string
220 expected bool
221 }{
222 {"https://streamable.com/abc123", true},
223 {"https://www.youtube.com/watch?v=test", true},
224 {"https://youtu.be/test", true},
225 {"https://reddit.com/r/test/comments/123", true},
226 {"https://www.reddit.com/r/test/comments/123", true},
227 {"https://example.com/article", false},
228 {"https://news.ycombinator.com/item?id=123", false},
229 {"https://kite.kagi.com/search?q=test", false},
230 }
231
232 for _, tt := range tests {
233 t.Run(tt.url, func(t *testing.T) {
234 result := isOEmbedProvider(tt.url)
235 assert.Equal(t, tt.expected, result, "URL: %s", tt.url)
236 })
237 }
238}
239
240func TestIsSupported(t *testing.T) {
241 tests := []struct {
242 url string
243 expected bool
244 }{
245 {"https://example.com", true},
246 {"http://example.com", true},
247 {"https://news.site.com/article", true},
248 {"ftp://example.com", false},
249 {"not-a-url", false},
250 {"", false},
251 }
252
253 for _, tt := range tests {
254 t.Run(tt.url, func(t *testing.T) {
255 result := isSupported(tt.url)
256 assert.Equal(t, tt.expected, result, "URL: %s", tt.url)
257 })
258 }
259}
260
261func TestGetAttr(t *testing.T) {
262 html := `<meta property="og:title" content="Test Title" name="test" />`
263 doc, err := parseOpenGraph(html)
264 require.NoError(t, err)
265
266 // This is a simple test to verify the helper function works
267 // The actual usage is tested in the parseOpenGraph tests
268 assert.NotNil(t, doc)
269}