commit 7019b63922a7b3cfcaa822ff547df95290cb6d1a · bretton.dev/coves

+200

internal/core/unfurl/circuit_breaker.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import (

     

       4
       4
       +
       	"fmt"

     

       5
       5
       +
       	"log"

     

       6
       6
       +
       	"sync"

     

       7
       7
       +
       	"time"

     

       8
       8
       +
       )

     

       9
       9
       +
       

     

       10
       10
       +
       // circuitState represents the state of a circuit breaker

     

       11
       11
       +
       type circuitState int

     

       12
       12
       +
       

     

       13
       13
       +
       const (

     

       14
       14
       +
       	stateClosed   circuitState = iota // Normal operation

     

       15
       15
       +
       	stateOpen                         // Circuit is open (provider failing)

     

       16
       16
       +
       	stateHalfOpen                     // Testing if provider recovered

     

       17
       17
       +
       )

     

       18
       18
       +
       

     

       19
       19
       +
       // circuitBreaker tracks failures per provider and stops trying failing providers

     

       20
       20
       +
       type circuitBreaker struct {

     

       21
       21
       +
       	failures         map[string]int

     

       22
       22
       +
       	lastFailure      map[string]time.Time

     

       23
       23
       +
       	state            map[string]circuitState

     

       24
       24
       +
       	lastStateLog     map[string]time.Time

     

       25
       25
       +
       	failureThreshold int

     

       26
       26
       +
       	openDuration     time.Duration

     

       27
       27
       +
       	mu               sync.RWMutex

     

       28
       28
       +
       }

     

       29
       29
       +
       

     

       30
       30
       +
       // newCircuitBreaker creates a circuit breaker with default settings

     

       31
       31
       +
       func newCircuitBreaker() *circuitBreaker {

     

       32
       32
       +
       	return &circuitBreaker{

     

       33
       33
       +
       		failureThreshold: 3,               // Open after 3 consecutive failures

     

       34
       34
       +
       		openDuration:     5 * time.Minute, // Keep open for 5 minutes

     

       35
       35
       +
       		failures:         make(map[string]int),

     

       36
       36
       +
       		lastFailure:      make(map[string]time.Time),

     

       37
       37
       +
       		state:            make(map[string]circuitState),

     

       38
       38
       +
       		lastStateLog:     make(map[string]time.Time),

     

       39
       39
       +
       	}

     

       40
       40
       +
       }

     

       41
       41
       +
       

     

       42
       42
       +
       // canAttempt checks if we should attempt to call this provider

     

       43
       43
       +
       // Returns true if circuit is closed or half-open (ready to retry)

     

       44
       44
       +
       func (cb *circuitBreaker) canAttempt(provider string) (bool, error) {

     

       45
       45
       +
       	cb.mu.RLock()

     

       46
       46
       +
       	defer cb.mu.RUnlock()

     

       47
       47
       +
       

     

       48
       48
       +
       	state := cb.getState(provider)

     

       49
       49
       +
       

     

       50
       50
       +
       	switch state {

     

       51
       51
       +
       	case stateClosed:

     

       52
       52
       +
       		return true, nil

     

       53
       53
       +
       	case stateOpen:

     

       54
       54
       +
       		// Check if we should transition to half-open

     

       55
       55
       +
       		lastFail := cb.lastFailure[provider]

     

       56
       56
       +
       		if time.Since(lastFail) > cb.openDuration {

     

       57
       57
       +
       			// Transition to half-open (allow one retry)

     

       58
       58
       +
       			cb.mu.RUnlock()

     

       59
       59
       +
       			cb.mu.Lock()

     

       60
       60
       +
       			cb.state[provider] = stateHalfOpen

     

       61
       61
       +
       			cb.logStateChange(provider, stateHalfOpen)

     

       62
       62
       +
       			cb.mu.Unlock()

     

       63
       63
       +
       			cb.mu.RLock()

     

       64
       64
       +
       			return true, nil

     

       65
       65
       +
       		}

     

       66
       66
       +
       		// Still in open period

     

       67
       67
       +
       		failCount := cb.failures[provider]

     

       68
       68
       +
       		nextRetry := lastFail.Add(cb.openDuration)

     

       69
       69
       +
       		return false, fmt.Errorf(

     

       70
       70
       +
       			"circuit breaker open for provider '%s' (failures: %d, next retry: %s)",

     

       71
       71
       +
       			provider,

     

       72
       72
       +
       			failCount,

     

       73
       73
       +
       			nextRetry.Format("15:04:05"),

     

       74
       74
       +
       		)

     

       75
       75
       +
       	case stateHalfOpen:

     

       76
       76
       +
       		return true, nil

     

       77
       77
       +
       	default:

     

       78
       78
       +
       		return true, nil

     

       79
       79
       +
       	}

     

       80
       80
       +
       }

     

       81
       81
       +
       

     

       82
       82
       +
       // recordSuccess records a successful unfurl, resetting failure count

     

       83
       83
       +
       func (cb *circuitBreaker) recordSuccess(provider string) {

     

       84
       84
       +
       	cb.mu.Lock()

     

       85
       85
       +
       	defer cb.mu.Unlock()

     

       86
       86
       +
       

     

       87
       87
       +
       	oldState := cb.getState(provider)

     

       88
       88
       +
       

     

       89
       89
       +
       	// Reset failure tracking

     

       90
       90
       +
       	delete(cb.failures, provider)

     

       91
       91
       +
       	delete(cb.lastFailure, provider)

     

       92
       92
       +
       	cb.state[provider] = stateClosed

     

       93
       93
       +
       

     

       94
       94
       +
       	// Log recovery if we were in a failure state

     

       95
       95
       +
       	if oldState != stateClosed {

     

       96
       96
       +
       		cb.logStateChange(provider, stateClosed)

     

       97
       97
       +
       	}

     

       98
       98
       +
       }

     

       99
       99
       +
       

     

       100
       100
       +
       // recordFailure records a failed unfurl attempt

     

       101
       101
       +
       func (cb *circuitBreaker) recordFailure(provider string, err error) {

     

       102
       102
       +
       	cb.mu.Lock()

     

       103
       103
       +
       	defer cb.mu.Unlock()

     

       104
       104
       +
       

     

       105
       105
       +
       	// Increment failure count

     

       106
       106
       +
       	cb.failures[provider]++

     

       107
       107
       +
       	cb.lastFailure[provider] = time.Now()

     

       108
       108
       +
       

     

       109
       109
       +
       	failCount := cb.failures[provider]

     

       110
       110
       +
       

     

       111
       111
       +
       	// Check if we should open the circuit

     

       112
       112
       +
       	if failCount >= cb.failureThreshold {

     

       113
       113
       +
       		oldState := cb.getState(provider)

     

       114
       114
       +
       		cb.state[provider] = stateOpen

     

       115
       115
       +
       		if oldState != stateOpen {

     

       116
       116
       +
       			log.Printf(

     

       117
       117
       +
       				"[UNFURL-CIRCUIT] Opening circuit for provider '%s' after %d consecutive failures. Last error: %v",

     

       118
       118
       +
       				provider,

     

       119
       119
       +
       				failCount,

     

       120
       120
       +
       				err,

     

       121
       121
       +
       			)

     

       122
       122
       +
       			cb.lastStateLog[provider] = time.Now()

     

       123
       123
       +
       		}

     

       124
       124
       +
       	} else {

     

       125
       125
       +
       		log.Printf(

     

       126
       126
       +
       			"[UNFURL-CIRCUIT] Failure %d/%d for provider '%s': %v",

     

       127
       127
       +
       			failCount,

     

       128
       128
       +
       			cb.failureThreshold,

     

       129
       129
       +
       			provider,

     

       130
       130
       +
       			err,

     

       131
       131
       +
       		)

     

       132
       132
       +
       	}

     

       133
       133
       +
       }

     

       134
       134
       +
       

     

       135
       135
       +
       // getState returns the current state (must be called with lock held)

     

       136
       136
       +
       func (cb *circuitBreaker) getState(provider string) circuitState {

     

       137
       137
       +
       	if state, exists := cb.state[provider]; exists {

     

       138
       138
       +
       		return state

     

       139
       139
       +
       	}

     

       140
       140
       +
       	return stateClosed

     

       141
       141
       +
       }

     

       142
       142
       +
       

     

       143
       143
       +
       // logStateChange logs state transitions (must be called with lock held)

     

       144
       144
       +
       // Debounced to avoid log spam (max once per minute per provider)

     

       145
       145
       +
       func (cb *circuitBreaker) logStateChange(provider string, newState circuitState) {

     

       146
       146
       +
       	lastLog, exists := cb.lastStateLog[provider]

     

       147
       147
       +
       	if exists && time.Since(lastLog) < time.Minute {

     

       148
       148
       +
       		return // Don't spam logs

     

       149
       149
       +
       	}

     

       150
       150
       +
       

     

       151
       151
       +
       	var stateStr string

     

       152
       152
       +
       	switch newState {

     

       153
       153
       +
       	case stateClosed:

     

       154
       154
       +
       		stateStr = "CLOSED (recovered)"

     

       155
       155
       +
       	case stateOpen:

     

       156
       156
       +
       		stateStr = "OPEN (failing)"

     

       157
       157
       +
       	case stateHalfOpen:

     

       158
       158
       +
       		stateStr = "HALF-OPEN (testing)"

     

       159
       159
       +
       	}

     

       160
       160
       +
       

     

       161
       161
       +
       	log.Printf("[UNFURL-CIRCUIT] Circuit for provider '%s' is now %s", provider, stateStr)

     

       162
       162
       +
       	cb.lastStateLog[provider] = time.Now()

     

       163
       163
       +
       }

     

       164
       164
       +
       

     

       165
       165
       +
       // getStats returns current circuit breaker stats (for debugging/monitoring)

     

       166
       166
       +
       func (cb *circuitBreaker) getStats() map[string]interface{} {

     

       167
       167
       +
       	cb.mu.RLock()

     

       168
       168
       +
       	defer cb.mu.RUnlock()

     

       169
       169
       +
       

     

       170
       170
       +
       	stats := make(map[string]interface{})

     

       171
       171
       +
       

     

       172
       172
       +
       	// Collect all providers with any activity (state, failures, or both)

     

       173
       173
       +
       	providers := make(map[string]bool)

     

       174
       174
       +
       	for provider := range cb.state {

     

       175
       175
       +
       		providers[provider] = true

     

       176
       176
       +
       	}

     

       177
       177
       +
       	for provider := range cb.failures {

     

       178
       178
       +
       		providers[provider] = true

     

       179
       179
       +
       	}

     

       180
       180
       +
       

     

       181
       181
       +
       	for provider := range providers {

     

       182
       182
       +
       		state := cb.getState(provider)

     

       183
       183
       +
       		var stateStr string

     

       184
       184
       +
       		switch state {

     

       185
       185
       +
       		case stateClosed:

     

       186
       186
       +
       			stateStr = "closed"

     

       187
       187
       +
       		case stateOpen:

     

       188
       188
       +
       			stateStr = "open"

     

       189
       189
       +
       		case stateHalfOpen:

     

       190
       190
       +
       			stateStr = "half-open"

     

       191
       191
       +
       		}

     

       192
       192
       +
       

     

       193
       193
       +
       		stats[provider] = map[string]interface{}{

     

       194
       194
       +
       			"state":        stateStr,

     

       195
       195
       +
       			"failures":     cb.failures[provider],

     

       196
       196
       +
       			"last_failure": cb.lastFailure[provider],

     

       197
       197
       +
       		}

     

       198
       198
       +
       	}

     

       199
       199
       +
       	return stats

     

       200
       200
       +
       }

+175

internal/core/unfurl/circuit_breaker_test.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import (

     

       4
       4
       +
       	"fmt"

     

       5
       5
       +
       	"testing"

     

       6
       6
       +
       	"time"

     

       7
       7
       +
       )

     

       8
       8
       +
       

     

       9
       9
       +
       func TestCircuitBreaker_Basic(t *testing.T) {

     

       10
       10
       +
       	cb := newCircuitBreaker()

     

       11
       11
       +
       

     

       12
       12
       +
       	provider := "test-provider"

     

       13
       13
       +
       

     

       14
       14
       +
       	// Should start closed (allow attempts)

     

       15
       15
       +
       	canAttempt, err := cb.canAttempt(provider)

     

       16
       16
       +
       	if !canAttempt {

     

       17
       17
       +
       		t.Errorf("Expected circuit to be closed initially, but got error: %v", err)

     

       18
       18
       +
       	}

     

       19
       19
       +
       

     

       20
       20
       +
       	// Record success

     

       21
       21
       +
       	cb.recordSuccess(provider)

     

       22
       22
       +
       	canAttempt, _ = cb.canAttempt(provider)

     

       23
       23
       +
       	if !canAttempt {

     

       24
       24
       +
       		t.Error("Expected circuit to remain closed after success")

     

       25
       25
       +
       	}

     

       26
       26
       +
       }

     

       27
       27
       +
       

     

       28
       28
       +
       func TestCircuitBreaker_OpensAfterFailures(t *testing.T) {

     

       29
       29
       +
       	cb := newCircuitBreaker()

     

       30
       30
       +
       	provider := "failing-provider"

     

       31
       31
       +
       

     

       32
       32
       +
       	// Record failures up to threshold

     

       33
       33
       +
       	for i := 0; i < cb.failureThreshold; i++ {

     

       34
       34
       +
       		cb.recordFailure(provider, fmt.Errorf("test error %d", i))

     

       35
       35
       +
       	}

     

       36
       36
       +
       

     

       37
       37
       +
       	// Circuit should now be open

     

       38
       38
       +
       	canAttempt, err := cb.canAttempt(provider)

     

       39
       39
       +
       	if canAttempt {

     

       40
       40
       +
       		t.Error("Expected circuit to be open after threshold failures")

     

       41
       41
       +
       	}

     

       42
       42
       +
       	if err == nil {

     

       43
       43
       +
       		t.Error("Expected error when circuit is open")

     

       44
       44
       +
       	}

     

       45
       45
       +
       }

     

       46
       46
       +
       

     

       47
       47
       +
       func TestCircuitBreaker_RecoveryAfterSuccess(t *testing.T) {

     

       48
       48
       +
       	cb := newCircuitBreaker()

     

       49
       49
       +
       	provider := "recovery-provider"

     

       50
       50
       +
       

     

       51
       51
       +
       	// Record some failures

     

       52
       52
       +
       	cb.recordFailure(provider, fmt.Errorf("error 1"))

     

       53
       53
       +
       	cb.recordFailure(provider, fmt.Errorf("error 2"))

     

       54
       54
       +
       

     

       55
       55
       +
       	// Record success - should reset failure count

     

       56
       56
       +
       	cb.recordSuccess(provider)

     

       57
       57
       +
       

     

       58
       58
       +
       	// Should be able to attempt again

     

       59
       59
       +
       	canAttempt, err := cb.canAttempt(provider)

     

       60
       60
       +
       	if !canAttempt {

     

       61
       61
       +
       		t.Errorf("Expected circuit to be closed after success, but got error: %v", err)

     

       62
       62
       +
       	}

     

       63
       63
       +
       

     

       64
       64
       +
       	// Failure count should be reset

     

       65
       65
       +
       	if count := cb.failures[provider]; count != 0 {

     

       66
       66
       +
       		t.Errorf("Expected failure count to be reset to 0, got %d", count)

     

       67
       67
       +
       	}

     

       68
       68
       +
       }

     

       69
       69
       +
       

     

       70
       70
       +
       func TestCircuitBreaker_HalfOpenTransition(t *testing.T) {

     

       71
       71
       +
       	cb := newCircuitBreaker()

     

       72
       72
       +
       	cb.openDuration = 100 * time.Millisecond // Short duration for testing

     

       73
       73
       +
       	provider := "half-open-provider"

     

       74
       74
       +
       

     

       75
       75
       +
       	// Open the circuit

     

       76
       76
       +
       	for i := 0; i < cb.failureThreshold; i++ {

     

       77
       77
       +
       		cb.recordFailure(provider, fmt.Errorf("error %d", i))

     

       78
       78
       +
       	}

     

       79
       79
       +
       

     

       80
       80
       +
       	// Should be open

     

       81
       81
       +
       	canAttempt, _ := cb.canAttempt(provider)

     

       82
       82
       +
       	if canAttempt {

     

       83
       83
       +
       		t.Error("Expected circuit to be open")

     

       84
       84
       +
       	}

     

       85
       85
       +
       

     

       86
       86
       +
       	// Wait for open duration

     

       87
       87
       +
       	time.Sleep(150 * time.Millisecond)

     

       88
       88
       +
       

     

       89
       89
       +
       	// Should transition to half-open and allow one attempt

     

       90
       90
       +
       	canAttempt, err := cb.canAttempt(provider)

     

       91
       91
       +
       	if !canAttempt {

     

       92
       92
       +
       		t.Errorf("Expected circuit to transition to half-open after duration, but got error: %v", err)

     

       93
       93
       +
       	}

     

       94
       94
       +
       

     

       95
       95
       +
       	// State should be half-open

     

       96
       96
       +
       	cb.mu.RLock()

     

       97
       97
       +
       	state := cb.state[provider]

     

       98
       98
       +
       	cb.mu.RUnlock()

     

       99
       99
       +
       

     

       100
       100
       +
       	if state != stateHalfOpen {

     

       101
       101
       +
       		t.Errorf("Expected state to be half-open, got %v", state)

     

       102
       102
       +
       	}

     

       103
       103
       +
       }

     

       104
       104
       +
       

     

       105
       105
       +
       func TestCircuitBreaker_MultipleProviders(t *testing.T) {

     

       106
       106
       +
       	cb := newCircuitBreaker()

     

       107
       107
       +
       

     

       108
       108
       +
       	// Open circuit for provider A

     

       109
       109
       +
       	for i := 0; i < cb.failureThreshold; i++ {

     

       110
       110
       +
       		cb.recordFailure("providerA", fmt.Errorf("error"))

     

       111
       111
       +
       	}

     

       112
       112
       +
       

     

       113
       113
       +
       	// Provider A should be blocked

     

       114
       114
       +
       	canAttemptA, _ := cb.canAttempt("providerA")

     

       115
       115
       +
       	if canAttemptA {

     

       116
       116
       +
       		t.Error("Expected providerA circuit to be open")

     

       117
       117
       +
       	}

     

       118
       118
       +
       

     

       119
       119
       +
       	// Provider B should still be open (independent circuits)

     

       120
       120
       +
       	canAttemptB, err := cb.canAttempt("providerB")

     

       121
       121
       +
       	if !canAttemptB {

     

       122
       122
       +
       		t.Errorf("Expected providerB circuit to be closed, but got error: %v", err)

     

       123
       123
       +
       	}

     

       124
       124
       +
       }

     

       125
       125
       +
       

     

       126
       126
       +
       func TestCircuitBreaker_GetStats(t *testing.T) {

     

       127
       127
       +
       	cb := newCircuitBreaker()

     

       128
       128
       +
       

     

       129
       129
       +
       	// Record some activity

     

       130
       130
       +
       	cb.recordFailure("provider1", fmt.Errorf("error 1"))

     

       131
       131
       +
       	cb.recordFailure("provider1", fmt.Errorf("error 2"))

     

       132
       132
       +
       

     

       133
       133
       +
       	stats := cb.getStats()

     

       134
       134
       +
       

     

       135
       135
       +
       	// Should have stats for providers with failures

     

       136
       136
       +
       	if providerStats, ok := stats["provider1"]; !ok {

     

       137
       137
       +
       		t.Error("Expected stats for provider1")

     

       138
       138
       +
       	} else {

     

       139
       139
       +
       		// Check that failure count is tracked

     

       140
       140
       +
       		statsMap := providerStats.(map[string]interface{})

     

       141
       141
       +
       		if failures, ok := statsMap["failures"].(int); !ok || failures != 2 {

     

       142
       142
       +
       			t.Errorf("Expected 2 failures for provider1, got %v", statsMap["failures"])

     

       143
       143
       +
       		}

     

       144
       144
       +
       	}

     

       145
       145
       +
       

     

       146
       146
       +
       	// Provider that succeeds is cleaned up from state

     

       147
       147
       +
       	cb.recordSuccess("provider2")

     

       148
       148
       +
       	_ = cb.getStats()

     

       149
       149
       +
       	// Provider2 should not be in stats (or have state "closed" with 0 failures)

     

       150
       150
       +
       }

     

       151
       151
       +
       

     

       152
       152
       +
       func TestCircuitBreaker_FailureThresholdExact(t *testing.T) {

     

       153
       153
       +
       	cb := newCircuitBreaker()

     

       154
       154
       +
       	provider := "exact-threshold-provider"

     

       155
       155
       +
       

     

       156
       156
       +
       	// Record failures just below threshold

     

       157
       157
       +
       	for i := 0; i < cb.failureThreshold-1; i++ {

     

       158
       158
       +
       		cb.recordFailure(provider, fmt.Errorf("error %d", i))

     

       159
       159
       +
       	}

     

       160
       160
       +
       

     

       161
       161
       +
       	// Should still be closed

     

       162
       162
       +
       	canAttempt, err := cb.canAttempt(provider)

     

       163
       163
       +
       	if !canAttempt {

     

       164
       164
       +
       		t.Errorf("Expected circuit to be closed below threshold, but got error: %v", err)

     

       165
       165
       +
       	}

     

       166
       166
       +
       

     

       167
       167
       +
       	// One more failure should open it

     

       168
       168
       +
       	cb.recordFailure(provider, fmt.Errorf("final error"))

     

       169
       169
       +
       

     

       170
       170
       +
       	// Should now be open

     

       171
       171
       +
       	canAttempt, _ = cb.canAttempt(provider)

     

       172
       172
       +
       	if canAttempt {

     

       173
       173
       +
       		t.Error("Expected circuit to be open at threshold")

     

       174
       174
       +
       	}

     

       175
       175
       +
       }

+202

internal/core/unfurl/kagi_test.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import (

     

       4
       4
       +
       	"context"

     

       5
       5
       +
       	"net/http"

     

       6
       6
       +
       	"net/http/httptest"

     

       7
       7
       +
       	"testing"

     

       8
       8
       +
       	"time"

     

       9
       9
       +
       

     

       10
       10
       +
       	"github.com/stretchr/testify/assert"

     

       11
       11
       +
       	"github.com/stretchr/testify/require"

     

       12
       12
       +
       )

     

       13
       13
       +
       

     

       14
       14
       +
       func TestFetchKagiKite_Success(t *testing.T) {

     

       15
       15
       +
       	// Mock Kagi HTML response

     

       16
       16
       +
       	mockHTML := `<!DOCTYPE html>

     

       17
       17
       +
       <html>

     

       18
       18
       +
       <head>

     

       19
       19
       +
       	<title>FAA orders 10% flight cuts at 40 airports - Kagi News</title>

     

       20
       20
       +
       	<meta property="og:title" content="FAA orders 10% flight cuts" />

     

       21
       21
       +
       	<meta property="og:description" content="Flight restrictions announced" />

     

       22
       22
       +
       </head>

     

       23
       23
       +
       <body>

     

       24
       24
       +
       	<img src="https://kagiproxy.com/img/DHdCvN_NqVDWU3UyoNZSv86b" alt="Airport runway" />

     

       25
       25
       +
       </body>

     

       26
       26
       +
       </html>`

     

       27
       27
       +
       

     

       28
       28
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       29
       29
       +
       		w.Header().Set("Content-Type", "text/html")

     

       30
       30
       +
       		w.WriteHeader(http.StatusOK)

     

       31
       31
       +
       		_, _ = w.Write([]byte(mockHTML))

     

       32
       32
       +
       	}))

     

       33
       33
       +
       	defer server.Close()

     

       34
       34
       +
       

     

       35
       35
       +
       	ctx := context.Background()

     

       36
       36
       +
       

     

       37
       37
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       38
       38
       +
       

     

       39
       39
       +
       	require.NoError(t, err)

     

       40
       40
       +
       	assert.Equal(t, "article", result.Type)

     

       41
       41
       +
       	assert.Equal(t, "FAA orders 10% flight cuts", result.Title)

     

       42
       42
       +
       	assert.Equal(t, "Flight restrictions announced", result.Description)

     

       43
       43
       +
       	assert.Contains(t, result.ThumbnailURL, "kagiproxy.com")

     

       44
       44
       +
       	assert.Equal(t, "kagi", result.Provider)

     

       45
       45
       +
       	assert.Equal(t, "kite.kagi.com", result.Domain)

     

       46
       46
       +
       }

     

       47
       47
       +
       

     

       48
       48
       +
       func TestFetchKagiKite_NoImage(t *testing.T) {

     

       49
       49
       +
       	mockHTML := `<!DOCTYPE html>

     

       50
       50
       +
       <html>

     

       51
       51
       +
       <head><title>Test Story</title></head>

     

       52
       52
       +
       <body><p>No images here</p></body>

     

       53
       53
       +
       </html>`

     

       54
       54
       +
       

     

       55
       55
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       56
       56
       +
       		w.Header().Set("Content-Type", "text/html")

     

       57
       57
       +
       		w.WriteHeader(http.StatusOK)

     

       58
       58
       +
       		_, _ = w.Write([]byte(mockHTML))

     

       59
       59
       +
       	}))

     

       60
       60
       +
       	defer server.Close()

     

       61
       61
       +
       

     

       62
       62
       +
       	ctx := context.Background()

     

       63
       63
       +
       

     

       64
       64
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       65
       65
       +
       

     

       66
       66
       +
       	assert.Error(t, err)

     

       67
       67
       +
       	assert.Nil(t, result)

     

       68
       68
       +
       	assert.Contains(t, err.Error(), "no image found")

     

       69
       69
       +
       }

     

       70
       70
       +
       

     

       71
       71
       +
       func TestFetchKagiKite_FallbackToTitle(t *testing.T) {

     

       72
       72
       +
       	mockHTML := `<!DOCTYPE html>

     

       73
       73
       +
       <html>

     

       74
       74
       +
       <head><title>Fallback Title</title></head>

     

       75
       75
       +
       <body>

     

       76
       76
       +
       	<img src="https://kagiproxy.com/img/test123" />

     

       77
       77
       +
       </body>

     

       78
       78
       +
       </html>`

     

       79
       79
       +
       

     

       80
       80
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       81
       81
       +
       		w.Header().Set("Content-Type", "text/html")

     

       82
       82
       +
       		w.WriteHeader(http.StatusOK)

     

       83
       83
       +
       		_, _ = w.Write([]byte(mockHTML))

     

       84
       84
       +
       	}))

     

       85
       85
       +
       	defer server.Close()

     

       86
       86
       +
       

     

       87
       87
       +
       	ctx := context.Background()

     

       88
       88
       +
       

     

       89
       89
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       90
       90
       +
       

     

       91
       91
       +
       	require.NoError(t, err)

     

       92
       92
       +
       	assert.Equal(t, "Fallback Title", result.Title)

     

       93
       93
       +
       	assert.Contains(t, result.ThumbnailURL, "kagiproxy.com")

     

       94
       94
       +
       }

     

       95
       95
       +
       

     

       96
       96
       +
       func TestFetchKagiKite_ImageWithAltText(t *testing.T) {

     

       97
       97
       +
       	mockHTML := `<!DOCTYPE html>

     

       98
       98
       +
       <html>

     

       99
       99
       +
       <head><title>News Story</title></head>

     

       100
       100
       +
       <body>

     

       101
       101
       +
       	<img src="https://kagiproxy.com/img/xyz789" alt="This is the alt text description" />

     

       102
       102
       +
       </body>

     

       103
       103
       +
       </html>`

     

       104
       104
       +
       

     

       105
       105
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       106
       106
       +
       		w.Header().Set("Content-Type", "text/html")

     

       107
       107
       +
       		w.WriteHeader(http.StatusOK)

     

       108
       108
       +
       		_, _ = w.Write([]byte(mockHTML))

     

       109
       109
       +
       	}))

     

       110
       110
       +
       	defer server.Close()

     

       111
       111
       +
       

     

       112
       112
       +
       	ctx := context.Background()

     

       113
       113
       +
       

     

       114
       114
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       115
       115
       +
       

     

       116
       116
       +
       	require.NoError(t, err)

     

       117
       117
       +
       	assert.Equal(t, "News Story", result.Title)

     

       118
       118
       +
       	assert.Equal(t, "This is the alt text description", result.Description)

     

       119
       119
       +
       	assert.Contains(t, result.ThumbnailURL, "kagiproxy.com")

     

       120
       120
       +
       }

     

       121
       121
       +
       

     

       122
       122
       +
       func TestFetchKagiKite_HTTPError(t *testing.T) {

     

       123
       123
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       124
       124
       +
       		w.WriteHeader(http.StatusNotFound)

     

       125
       125
       +
       	}))

     

       126
       126
       +
       	defer server.Close()

     

       127
       127
       +
       

     

       128
       128
       +
       	ctx := context.Background()

     

       129
       129
       +
       

     

       130
       130
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       131
       131
       +
       

     

       132
       132
       +
       	assert.Error(t, err)

     

       133
       133
       +
       	assert.Nil(t, result)

     

       134
       134
       +
       	assert.Contains(t, err.Error(), "HTTP 404")

     

       135
       135
       +
       }

     

       136
       136
       +
       

     

       137
       137
       +
       func TestFetchKagiKite_Timeout(t *testing.T) {

     

       138
       138
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       139
       139
       +
       		time.Sleep(2 * time.Second)

     

       140
       140
       +
       		w.WriteHeader(http.StatusOK)

     

       141
       141
       +
       	}))

     

       142
       142
       +
       	defer server.Close()

     

       143
       143
       +
       

     

       144
       144
       +
       	ctx := context.Background()

     

       145
       145
       +
       

     

       146
       146
       +
       	result, err := fetchKagiKite(ctx, server.URL, 100*time.Millisecond, "TestBot/1.0")

     

       147
       147
       +
       

     

       148
       148
       +
       	assert.Error(t, err)

     

       149
       149
       +
       	assert.Nil(t, result)

     

       150
       150
       +
       }

     

       151
       151
       +
       

     

       152
       152
       +
       func TestFetchKagiKite_MultipleImages_PicksSecond(t *testing.T) {

     

       153
       153
       +
       	mockHTML := `<!DOCTYPE html>

     

       154
       154
       +
       <html>

     

       155
       155
       +
       <head><title>Story with multiple images</title></head>

     

       156
       156
       +
       <body>

     

       157
       157
       +
       	<img src="https://kagiproxy.com/img/first123" alt="First image (header/logo)" />

     

       158
       158
       +
       	<img src="https://kagiproxy.com/img/second456" alt="Second image" />

     

       159
       159
       +
       </body>

     

       160
       160
       +
       </html>`

     

       161
       161
       +
       

     

       162
       162
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       163
       163
       +
       		w.Header().Set("Content-Type", "text/html")

     

       164
       164
       +
       		w.WriteHeader(http.StatusOK)

     

       165
       165
       +
       		_, _ = w.Write([]byte(mockHTML))

     

       166
       166
       +
       	}))

     

       167
       167
       +
       	defer server.Close()

     

       168
       168
       +
       

     

       169
       169
       +
       	ctx := context.Background()

     

       170
       170
       +
       

     

       171
       171
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       172
       172
       +
       

     

       173
       173
       +
       	require.NoError(t, err)

     

       174
       174
       +
       	// We skip the first image (often a header/logo) and use the second

     

       175
       175
       +
       	assert.Contains(t, result.ThumbnailURL, "second456")

     

       176
       176
       +
       	assert.Equal(t, "Second image", result.Description)

     

       177
       177
       +
       }

     

       178
       178
       +
       

     

       179
       179
       +
       func TestFetchKagiKite_OnlyNonKagiImages_NoMatch(t *testing.T) {

     

       180
       180
       +
       	mockHTML := `<!DOCTYPE html>

     

       181
       181
       +
       <html>

     

       182
       182
       +
       <head><title>Story with non-Kagi images</title></head>

     

       183
       183
       +
       <body>

     

       184
       184
       +
       	<img src="https://example.com/img/test.jpg" alt="External image" />

     

       185
       185
       +
       </body>

     

       186
       186
       +
       </html>`

     

       187
       187
       +
       

     

       188
       188
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       189
       189
       +
       		w.Header().Set("Content-Type", "text/html")

     

       190
       190
       +
       		w.WriteHeader(http.StatusOK)

     

       191
       191
       +
       		_, _ = w.Write([]byte(mockHTML))

     

       192
       192
       +
       	}))

     

       193
       193
       +
       	defer server.Close()

     

       194
       194
       +
       

     

       195
       195
       +
       	ctx := context.Background()

     

       196
       196
       +
       

     

       197
       197
       +
       	result, err := fetchKagiKite(ctx, server.URL, 5*time.Second, "TestBot/1.0")

     

       198
       198
       +
       

     

       199
       199
       +
       	assert.Error(t, err)

     

       200
       200
       +
       	assert.Nil(t, result)

     

       201
       201
       +
       	assert.Contains(t, err.Error(), "no image found")

     

       202
       202
       +
       }

+269

internal/core/unfurl/opengraph_test.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import (

     

       4
       4
       +
       	"context"

     

       5
       5
       +
       	"net/http"

     

       6
       6
       +
       	"net/http/httptest"

     

       7
       7
       +
       	"testing"

     

       8
       8
       +
       	"time"

     

       9
       9
       +
       

     

       10
       10
       +
       	"github.com/stretchr/testify/assert"

     

       11
       11
       +
       	"github.com/stretchr/testify/require"

     

       12
       12
       +
       )

     

       13
       13
       +
       

     

       14
       14
       +
       func TestParseOpenGraph_ValidTags(t *testing.T) {

     

       15
       15
       +
       	html := `

     

       16
       16
       +
       <!DOCTYPE html>

     

       17
       17
       +
       <html>

     

       18
       18
       +
       <head>

     

       19
       19
       +
           <meta property="og:title" content="Test Article Title" />

     

       20
       20
       +
           <meta property="og:description" content="This is a test description" />

     

       21
       21
       +
           <meta property="og:image" content="https://example.com/image.jpg" />

     

       22
       22
       +
           <meta property="og:url" content="https://example.com/canonical" />

     

       23
       23
       +
       </head>

     

       24
       24
       +
       <body>

     

       25
       25
       +
           <p>Some content</p>

     

       26
       26
       +
       </body>

     

       27
       27
       +
       </html>

     

       28
       28
       +
       `

     

       29
       29
       +
       

     

       30
       30
       +
       	og, err := parseOpenGraph(html)

     

       31
       31
       +
       	require.NoError(t, err)

     

       32
       32
       +
       

     

       33
       33
       +
       	assert.Equal(t, "Test Article Title", og.Title)

     

       34
       34
       +
       	assert.Equal(t, "This is a test description", og.Description)

     

       35
       35
       +
       	assert.Equal(t, "https://example.com/image.jpg", og.Image)

     

       36
       36
       +
       	assert.Equal(t, "https://example.com/canonical", og.URL)

     

       37
       37
       +
       }

     

       38
       38
       +
       

     

       39
       39
       +
       func TestParseOpenGraph_MissingImage(t *testing.T) {

     

       40
       40
       +
       	html := `

     

       41
       41
       +
       <!DOCTYPE html>

     

       42
       42
       +
       <html>

     

       43
       43
       +
       <head>

     

       44
       44
       +
           <meta property="og:title" content="Article Without Image" />

     

       45
       45
       +
           <meta property="og:description" content="No image tag" />

     

       46
       46
       +
       </head>

     

       47
       47
       +
       <body></body>

     

       48
       48
       +
       </html>

     

       49
       49
       +
       `

     

       50
       50
       +
       

     

       51
       51
       +
       	og, err := parseOpenGraph(html)

     

       52
       52
       +
       	require.NoError(t, err)

     

       53
       53
       +
       

     

       54
       54
       +
       	assert.Equal(t, "Article Without Image", og.Title)

     

       55
       55
       +
       	assert.Equal(t, "No image tag", og.Description)

     

       56
       56
       +
       	assert.Empty(t, og.Image, "Image should be empty when not provided")

     

       57
       57
       +
       }

     

       58
       58
       +
       

     

       59
       59
       +
       func TestParseOpenGraph_FallbackToTitle(t *testing.T) {

     

       60
       60
       +
       	html := `

     

       61
       61
       +
       <!DOCTYPE html>

     

       62
       62
       +
       <html>

     

       63
       63
       +
       <head>

     

       64
       64
       +
           <title>Page Title Fallback</title>

     

       65
       65
       +
           <meta name="description" content="Meta description fallback" />

     

       66
       66
       +
       </head>

     

       67
       67
       +
       <body></body>

     

       68
       68
       +
       </html>

     

       69
       69
       +
       `

     

       70
       70
       +
       

     

       71
       71
       +
       	og, err := parseOpenGraph(html)

     

       72
       72
       +
       	require.NoError(t, err)

     

       73
       73
       +
       

     

       74
       74
       +
       	assert.Equal(t, "Page Title Fallback", og.Title, "Should fall back to <title>")

     

       75
       75
       +
       	assert.Equal(t, "Meta description fallback", og.Description, "Should fall back to meta description")

     

       76
       76
       +
       }

     

       77
       77
       +
       

     

       78
       78
       +
       func TestParseOpenGraph_PreferOpenGraphOverFallback(t *testing.T) {

     

       79
       79
       +
       	html := `

     

       80
       80
       +
       <!DOCTYPE html>

     

       81
       81
       +
       <html>

     

       82
       82
       +
       <head>

     

       83
       83
       +
           <title>Page Title</title>

     

       84
       84
       +
           <meta name="description" content="Meta description" />

     

       85
       85
       +
           <meta property="og:title" content="OpenGraph Title" />

     

       86
       86
       +
           <meta property="og:description" content="OpenGraph Description" />

     

       87
       87
       +
       </head>

     

       88
       88
       +
       <body></body>

     

       89
       89
       +
       </html>

     

       90
       90
       +
       `

     

       91
       91
       +
       

     

       92
       92
       +
       	og, err := parseOpenGraph(html)

     

       93
       93
       +
       	require.NoError(t, err)

     

       94
       94
       +
       

     

       95
       95
       +
       	assert.Equal(t, "OpenGraph Title", og.Title, "Should prefer og:title")

     

       96
       96
       +
       	assert.Equal(t, "OpenGraph Description", og.Description, "Should prefer og:description")

     

       97
       97
       +
       }

     

       98
       98
       +
       

     

       99
       99
       +
       func TestParseOpenGraph_MalformedHTML(t *testing.T) {

     

       100
       100
       +
       	html := `

     

       101
       101
       +
       <!DOCTYPE html>

     

       102
       102
       +
       <html>

     

       103
       103
       +
       <head>

     

       104
       104
       +
           <meta property="og:title" content="Still Works" />

     

       105
       105
       +
           <meta property="og:description" content="Even with broken tags

     

       106
       106
       +
       </head>

     

       107
       107
       +
       <body>

     

       108
       108
       +
           <p>Unclosed paragraph

     

       109
       109
       +
       </body>

     

       110
       110
       +
       `

     

       111
       111
       +
       

     

       112
       112
       +
       	og, err := parseOpenGraph(html)

     

       113
       113
       +
       	require.NoError(t, err)

     

       114
       114
       +
       

     

       115
       115
       +
       	// Best-effort parsing should still extract what it can

     

       116
       116
       +
       	assert.NotEmpty(t, og.Title, "Should extract title despite malformed HTML")

     

       117
       117
       +
       }

     

       118
       118
       +
       

     

       119
       119
       +
       func TestParseOpenGraph_Empty(t *testing.T) {

     

       120
       120
       +
       	html := `

     

       121
       121
       +
       <!DOCTYPE html>

     

       122
       122
       +
       <html>

     

       123
       123
       +
       <head></head>

     

       124
       124
       +
       <body></body>

     

       125
       125
       +
       </html>

     

       126
       126
       +
       `

     

       127
       127
       +
       

     

       128
       128
       +
       	og, err := parseOpenGraph(html)

     

       129
       129
       +
       	require.NoError(t, err)

     

       130
       130
       +
       

     

       131
       131
       +
       	assert.Empty(t, og.Title)

     

       132
       132
       +
       	assert.Empty(t, og.Description)

     

       133
       133
       +
       	assert.Empty(t, og.Image)

     

       134
       134
       +
       }

     

       135
       135
       +
       

     

       136
       136
       +
       func TestFetchOpenGraph_Success(t *testing.T) {

     

       137
       137
       +
       	// Create test server with OpenGraph metadata

     

       138
       138
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       139
       139
       +
       		assert.Contains(t, r.Header.Get("User-Agent"), "CovesBot")

     

       140
       140
       +
       

     

       141
       141
       +
       		html := `

     

       142
       142
       +
       <!DOCTYPE html>

     

       143
       143
       +
       <html>

     

       144
       144
       +
       <head>

     

       145
       145
       +
           <meta property="og:title" content="Test News Article" />

     

       146
       146
       +
           <meta property="og:description" content="Breaking news story" />

     

       147
       147
       +
           <meta property="og:image" content="https://example.com/news.jpg" />

     

       148
       148
       +
           <meta property="og:url" content="https://example.com/article/123" />

     

       149
       149
       +
       </head>

     

       150
       150
       +
       <body><p>Article content</p></body>

     

       151
       151
       +
       </html>

     

       152
       152
       +
       `

     

       153
       153
       +
       		w.Header().Set("Content-Type", "text/html")

     

       154
       154
       +
       		w.WriteHeader(http.StatusOK)

     

       155
       155
       +
       		_, _ = w.Write([]byte(html))

     

       156
       156
       +
       	}))

     

       157
       157
       +
       	defer server.Close()

     

       158
       158
       +
       

     

       159
       159
       +
       	ctx := context.Background()

     

       160
       160
       +
       	result, err := fetchOpenGraph(ctx, server.URL, 10*time.Second, "CovesBot/1.0")

     

       161
       161
       +
       	require.NoError(t, err)

     

       162
       162
       +
       	require.NotNil(t, result)

     

       163
       163
       +
       

     

       164
       164
       +
       	assert.Equal(t, "Test News Article", result.Title)

     

       165
       165
       +
       	assert.Equal(t, "Breaking news story", result.Description)

     

       166
       166
       +
       	assert.Equal(t, "https://example.com/news.jpg", result.ThumbnailURL)

     

       167
       167
       +
       	assert.Equal(t, "article", result.Type)

     

       168
       168
       +
       	assert.Equal(t, "opengraph", result.Provider)

     

       169
       169
       +
       }

     

       170
       170
       +
       

     

       171
       171
       +
       func TestFetchOpenGraph_HTTPError(t *testing.T) {

     

       172
       172
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       173
       173
       +
       		w.WriteHeader(http.StatusNotFound)

     

       174
       174
       +
       	}))

     

       175
       175
       +
       	defer server.Close()

     

       176
       176
       +
       

     

       177
       177
       +
       	ctx := context.Background()

     

       178
       178
       +
       	result, err := fetchOpenGraph(ctx, server.URL, 10*time.Second, "CovesBot/1.0")

     

       179
       179
       +
       	require.Error(t, err)

     

       180
       180
       +
       	assert.Nil(t, result)

     

       181
       181
       +
       	assert.Contains(t, err.Error(), "404")

     

       182
       182
       +
       }

     

       183
       183
       +
       

     

       184
       184
       +
       func TestFetchOpenGraph_Timeout(t *testing.T) {

     

       185
       185
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       186
       186
       +
       		time.Sleep(2 * time.Second)

     

       187
       187
       +
       		w.WriteHeader(http.StatusOK)

     

       188
       188
       +
       	}))

     

       189
       189
       +
       	defer server.Close()

     

       190
       190
       +
       

     

       191
       191
       +
       	ctx := context.Background()

     

       192
       192
       +
       	result, err := fetchOpenGraph(ctx, server.URL, 100*time.Millisecond, "CovesBot/1.0")

     

       193
       193
       +
       	require.Error(t, err)

     

       194
       194
       +
       	assert.Nil(t, result)

     

       195
       195
       +
       }

     

       196
       196
       +
       

     

       197
       197
       +
       func TestFetchOpenGraph_NoMetadata(t *testing.T) {

     

       198
       198
       +
       	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

     

       199
       199
       +
       		html := `<html><head></head><body><p>No metadata</p></body></html>`

     

       200
       200
       +
       		w.Header().Set("Content-Type", "text/html")

     

       201
       201
       +
       		w.WriteHeader(http.StatusOK)

     

       202
       202
       +
       		_, _ = w.Write([]byte(html))

     

       203
       203
       +
       	}))

     

       204
       204
       +
       	defer server.Close()

     

       205
       205
       +
       

     

       206
       206
       +
       	ctx := context.Background()

     

       207
       207
       +
       	result, err := fetchOpenGraph(ctx, server.URL, 10*time.Second, "CovesBot/1.0")

     

       208
       208
       +
       	require.NoError(t, err)

     

       209
       209
       +
       	require.NotNil(t, result)

     

       210
       210
       +
       

     

       211
       211
       +
       	// Should still return a result with domain

     

       212
       212
       +
       	assert.Equal(t, "article", result.Type)

     

       213
       213
       +
       	assert.Equal(t, "opengraph", result.Provider)

     

       214
       214
       +
       	assert.NotEmpty(t, result.Domain)

     

       215
       215
       +
       }

     

       216
       216
       +
       

     

       217
       217
       +
       func TestIsOEmbedProvider(t *testing.T) {

     

       218
       218
       +
       	tests := []struct {

     

       219
       219
       +
       		url      string

     

       220
       220
       +
       		expected bool

     

       221
       221
       +
       	}{

     

       222
       222
       +
       		{"https://streamable.com/abc123", true},

     

       223
       223
       +
       		{"https://www.youtube.com/watch?v=test", true},

     

       224
       224
       +
       		{"https://youtu.be/test", true},

     

       225
       225
       +
       		{"https://reddit.com/r/test/comments/123", true},

     

       226
       226
       +
       		{"https://www.reddit.com/r/test/comments/123", true},

     

       227
       227
       +
       		{"https://example.com/article", false},

     

       228
       228
       +
       		{"https://news.ycombinator.com/item?id=123", false},

     

       229
       229
       +
       		{"https://kite.kagi.com/search?q=test", false},

     

       230
       230
       +
       	}

     

       231
       231
       +
       

     

       232
       232
       +
       	for _, tt := range tests {

     

       233
       233
       +
       		t.Run(tt.url, func(t *testing.T) {

     

       234
       234
       +
       			result := isOEmbedProvider(tt.url)

     

       235
       235
       +
       			assert.Equal(t, tt.expected, result, "URL: %s", tt.url)

     

       236
       236
       +
       		})

     

       237
       237
       +
       	}

     

       238
       238
       +
       }

     

       239
       239
       +
       

     

       240
       240
       +
       func TestIsSupported(t *testing.T) {

     

       241
       241
       +
       	tests := []struct {

     

       242
       242
       +
       		url      string

     

       243
       243
       +
       		expected bool

     

       244
       244
       +
       	}{

     

       245
       245
       +
       		{"https://example.com", true},

     

       246
       246
       +
       		{"http://example.com", true},

     

       247
       247
       +
       		{"https://news.site.com/article", true},

     

       248
       248
       +
       		{"ftp://example.com", false},

     

       249
       249
       +
       		{"not-a-url", false},

     

       250
       250
       +
       		{"", false},

     

       251
       251
       +
       	}

     

       252
       252
       +
       

     

       253
       253
       +
       	for _, tt := range tests {

     

       254
       254
       +
       		t.Run(tt.url, func(t *testing.T) {

     

       255
       255
       +
       			result := isSupported(tt.url)

     

       256
       256
       +
       			assert.Equal(t, tt.expected, result, "URL: %s", tt.url)

     

       257
       257
       +
       		})

     

       258
       258
       +
       	}

     

       259
       259
       +
       }

     

       260
       260
       +
       

     

       261
       261
       +
       func TestGetAttr(t *testing.T) {

     

       262
       262
       +
       	html := `<meta property="og:title" content="Test Title" name="test" />`

     

       263
       263
       +
       	doc, err := parseOpenGraph(html)

     

       264
       264
       +
       	require.NoError(t, err)

     

       265
       265
       +
       

     

       266
       266
       +
       	// This is a simple test to verify the helper function works

     

       267
       267
       +
       	// The actual usage is tested in the parseOpenGraph tests

     

       268
       268
       +
       	assert.NotNil(t, doc)

     

       269
       269
       +
       }

+436

internal/core/unfurl/providers.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import (

     

       4
       4
       +
       	"context"

     

       5
       5
       +
       	"encoding/json"

     

       6
       6
       +
       	"fmt"

     

       7
       7
       +
       	"io"

     

       8
       8
       +
       	"net/http"

     

       9
       9
       +
       	"net/url"

     

       10
       10
       +
       	"strings"

     

       11
       11
       +
       	"time"

     

       12
       12
       +
       

     

       13
       13
       +
       	"golang.org/x/net/html"

     

       14
       14
       +
       )

     

       15
       15
       +
       

     

       16
       16
       +
       // Provider configuration

     

       17
       17
       +
       var oEmbedEndpoints = map[string]string{

     

       18
       18
       +
       	"streamable.com": "https://api.streamable.com/oembed",

     

       19
       19
       +
       	"youtube.com":    "https://www.youtube.com/oembed",

     

       20
       20
       +
       	"youtu.be":       "https://www.youtube.com/oembed",

     

       21
       21
       +
       	"reddit.com":     "https://www.reddit.com/oembed",

     

       22
       22
       +
       }

     

       23
       23
       +
       

     

       24
       24
       +
       // oEmbedResponse represents a standard oEmbed response

     

       25
       25
       +
       type oEmbedResponse struct {

     

       26
       26
       +
       	ThumbnailURL    string `json:"thumbnail_url"`

     

       27
       27
       +
       	Version         string `json:"version"`

     

       28
       28
       +
       	Title           string `json:"title"`

     

       29
       29
       +
       	AuthorName      string `json:"author_name"`

     

       30
       30
       +
       	ProviderName    string `json:"provider_name"`

     

       31
       31
       +
       	ProviderURL     string `json:"provider_url"`

     

       32
       32
       +
       	Type            string `json:"type"`

     

       33
       33
       +
       	HTML            string `json:"html"`

     

       34
       34
       +
       	Description     string `json:"description"`

     

       35
       35
       +
       	ThumbnailWidth  int    `json:"thumbnail_width"`

     

       36
       36
       +
       	ThumbnailHeight int    `json:"thumbnail_height"`

     

       37
       37
       +
       	Width           int    `json:"width"`

     

       38
       38
       +
       	Height          int    `json:"height"`

     

       39
       39
       +
       }

     

       40
       40
       +
       

     

       41
       41
       +
       // extractDomain extracts the domain from a URL

     

       42
       42
       +
       func extractDomain(urlStr string) string {

     

       43
       43
       +
       	parsed, err := url.Parse(urlStr)

     

       44
       44
       +
       	if err != nil {

     

       45
       45
       +
       		return ""

     

       46
       46
       +
       	}

     

       47
       47
       +
       	// Remove www. prefix

     

       48
       48
       +
       	domain := strings.TrimPrefix(parsed.Host, "www.")

     

       49
       49
       +
       	return domain

     

       50
       50
       +
       }

     

       51
       51
       +
       

     

       52
       52
       +
       // isSupported checks if this is a valid HTTP/HTTPS URL

     

       53
       53
       +
       func isSupported(urlStr string) bool {

     

       54
       54
       +
       	parsed, err := url.Parse(urlStr)

     

       55
       55
       +
       	if err != nil {

     

       56
       56
       +
       		return false

     

       57
       57
       +
       	}

     

       58
       58
       +
       	scheme := strings.ToLower(parsed.Scheme)

     

       59
       59
       +
       	return scheme == "http" || scheme == "https"

     

       60
       60
       +
       }

     

       61
       61
       +
       

     

       62
       62
       +
       // isOEmbedProvider checks if we have an oEmbed endpoint for this URL

     

       63
       63
       +
       func isOEmbedProvider(urlStr string) bool {

     

       64
       64
       +
       	domain := extractDomain(urlStr)

     

       65
       65
       +
       	_, exists := oEmbedEndpoints[domain]

     

       66
       66
       +
       	return exists

     

       67
       67
       +
       }

     

       68
       68
       +
       

     

       69
       69
       +
       // fetchOEmbed fetches oEmbed data from the provider

     

       70
       70
       +
       func fetchOEmbed(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*oEmbedResponse, error) {

     

       71
       71
       +
       	domain := extractDomain(urlStr)

     

       72
       72
       +
       	endpoint, exists := oEmbedEndpoints[domain]

     

       73
       73
       +
       	if !exists {

     

       74
       74
       +
       		return nil, fmt.Errorf("no oEmbed endpoint for domain: %s", domain)

     

       75
       75
       +
       	}

     

       76
       76
       +
       

     

       77
       77
       +
       	// Build oEmbed request URL

     

       78
       78
       +
       	oembedURL := fmt.Sprintf("%s?url=%s&format=json", endpoint, url.QueryEscape(urlStr))

     

       79
       79
       +
       

     

       80
       80
       +
       	// Create HTTP request

     

       81
       81
       +
       	req, err := http.NewRequestWithContext(ctx, "GET", oembedURL, nil)

     

       82
       82
       +
       	if err != nil {

     

       83
       83
       +
       		return nil, fmt.Errorf("failed to create oEmbed request: %w", err)

     

       84
       84
       +
       	}

     

       85
       85
       +
       

     

       86
       86
       +
       	req.Header.Set("User-Agent", userAgent)

     

       87
       87
       +
       

     

       88
       88
       +
       	// Create HTTP client with timeout

     

       89
       89
       +
       	client := &http.Client{Timeout: timeout}

     

       90
       90
       +
       	resp, err := client.Do(req)

     

       91
       91
       +
       	if err != nil {

     

       92
       92
       +
       		return nil, fmt.Errorf("failed to fetch oEmbed data: %w", err)

     

       93
       93
       +
       	}

     

       94
       94
       +
       	defer func() { _ = resp.Body.Close() }()

     

       95
       95
       +
       

     

       96
       96
       +
       	if resp.StatusCode != http.StatusOK {

     

       97
       97
       +
       		return nil, fmt.Errorf("oEmbed endpoint returned status %d", resp.StatusCode)

     

       98
       98
       +
       	}

     

       99
       99
       +
       

     

       100
       100
       +
       	// Parse JSON response

     

       101
       101
       +
       	var oembed oEmbedResponse

     

       102
       102
       +
       	if err := json.NewDecoder(resp.Body).Decode(&oembed); err != nil {

     

       103
       103
       +
       		return nil, fmt.Errorf("failed to parse oEmbed response: %w", err)

     

       104
       104
       +
       	}

     

       105
       105
       +
       

     

       106
       106
       +
       	return &oembed, nil

     

       107
       107
       +
       }

     

       108
       108
       +
       

     

       109
       109
       +
       // mapOEmbedToResult converts oEmbed response to UnfurlResult

     

       110
       110
       +
       func mapOEmbedToResult(oembed *oEmbedResponse, originalURL string) *UnfurlResult {

     

       111
       111
       +
       	result := &UnfurlResult{

     

       112
       112
       +
       		URI:          originalURL,

     

       113
       113
       +
       		Title:        oembed.Title,

     

       114
       114
       +
       		Description:  oembed.Description,

     

       115
       115
       +
       		ThumbnailURL: oembed.ThumbnailURL,

     

       116
       116
       +
       		Provider:     strings.ToLower(oembed.ProviderName),

     

       117
       117
       +
       		Domain:       extractDomain(originalURL),

     

       118
       118
       +
       		Width:        oembed.Width,

     

       119
       119
       +
       		Height:       oembed.Height,

     

       120
       120
       +
       	}

     

       121
       121
       +
       

     

       122
       122
       +
       	// Map oEmbed type to our embedType

     

       123
       123
       +
       	switch oembed.Type {

     

       124
       124
       +
       	case "video":

     

       125
       125
       +
       		result.Type = "video"

     

       126
       126
       +
       	case "photo":

     

       127
       127
       +
       		result.Type = "image"

     

       128
       128
       +
       	default:

     

       129
       129
       +
       		result.Type = "article"

     

       130
       130
       +
       	}

     

       131
       131
       +
       

     

       132
       132
       +
       	// If no description but we have author name, use that

     

       133
       133
       +
       	if result.Description == "" && oembed.AuthorName != "" {

     

       134
       134
       +
       		result.Description = fmt.Sprintf("By %s", oembed.AuthorName)

     

       135
       135
       +
       	}

     

       136
       136
       +
       

     

       137
       137
       +
       	return result

     

       138
       138
       +
       }

     

       139
       139
       +
       

     

       140
       140
       +
       // openGraphData represents OpenGraph metadata extracted from HTML

     

       141
       141
       +
       type openGraphData struct {

     

       142
       142
       +
       	Title       string

     

       143
       143
       +
       	Description string

     

       144
       144
       +
       	Image       string

     

       145
       145
       +
       	URL         string

     

       146
       146
       +
       }

     

       147
       147
       +
       

     

       148
       148
       +
       // fetchOpenGraph fetches OpenGraph metadata from a URL

     

       149
       149
       +
       func fetchOpenGraph(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*UnfurlResult, error) {

     

       150
       150
       +
       	// Create HTTP request

     

       151
       151
       +
       	req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)

     

       152
       152
       +
       	if err != nil {

     

       153
       153
       +
       		return nil, fmt.Errorf("failed to create request: %w", err)

     

       154
       154
       +
       	}

     

       155
       155
       +
       

     

       156
       156
       +
       	req.Header.Set("User-Agent", userAgent)

     

       157
       157
       +
       

     

       158
       158
       +
       	// Create HTTP client with timeout

     

       159
       159
       +
       	client := &http.Client{Timeout: timeout}

     

       160
       160
       +
       	resp, err := client.Do(req)

     

       161
       161
       +
       	if err != nil {

     

       162
       162
       +
       		return nil, fmt.Errorf("failed to fetch URL: %w", err)

     

       163
       163
       +
       	}

     

       164
       164
       +
       	defer func() { _ = resp.Body.Close() }()

     

       165
       165
       +
       

     

       166
       166
       +
       	if resp.StatusCode != http.StatusOK {

     

       167
       167
       +
       		return nil, fmt.Errorf("HTTP request returned status %d", resp.StatusCode)

     

       168
       168
       +
       	}

     

       169
       169
       +
       

     

       170
       170
       +
       	// Read response body (limit to 10MB to prevent abuse)

     

       171
       171
       +
       	limitedReader := io.LimitReader(resp.Body, 10*1024*1024)

     

       172
       172
       +
       	body, err := io.ReadAll(limitedReader)

     

       173
       173
       +
       	if err != nil {

     

       174
       174
       +
       		return nil, fmt.Errorf("failed to read response body: %w", err)

     

       175
       175
       +
       	}

     

       176
       176
       +
       

     

       177
       177
       +
       	// Parse OpenGraph metadata

     

       178
       178
       +
       	og, err := parseOpenGraph(string(body))

     

       179
       179
       +
       	if err != nil {

     

       180
       180
       +
       		return nil, fmt.Errorf("failed to parse OpenGraph metadata: %w", err)

     

       181
       181
       +
       	}

     

       182
       182
       +
       

     

       183
       183
       +
       	// Build UnfurlResult

     

       184
       184
       +
       	result := &UnfurlResult{

     

       185
       185
       +
       		Type:         "article", // Default type for OpenGraph

     

       186
       186
       +
       		URI:          urlStr,

     

       187
       187
       +
       		Title:        og.Title,

     

       188
       188
       +
       		Description:  og.Description,

     

       189
       189
       +
       		ThumbnailURL: og.Image,

     

       190
       190
       +
       		Provider:     "opengraph",

     

       191
       191
       +
       		Domain:       extractDomain(urlStr),

     

       192
       192
       +
       	}

     

       193
       193
       +
       

     

       194
       194
       +
       	// Use og:url if available and valid

     

       195
       195
       +
       	if og.URL != "" {

     

       196
       196
       +
       		result.URI = og.URL

     

       197
       197
       +
       	}

     

       198
       198
       +
       

     

       199
       199
       +
       	return result, nil

     

       200
       200
       +
       }

     

       201
       201
       +
       

     

       202
       202
       +
       // parseOpenGraph extracts OpenGraph metadata from HTML

     

       203
       203
       +
       func parseOpenGraph(htmlContent string) (*openGraphData, error) {

     

       204
       204
       +
       	og := &openGraphData{}

     

       205
       205
       +
       	doc, err := html.Parse(strings.NewReader(htmlContent))

     

       206
       206
       +
       	if err != nil {

     

       207
       207
       +
       		// Try best-effort parsing even with invalid HTML

     

       208
       208
       +
       		return og, nil

     

       209
       209
       +
       	}

     

       210
       210
       +
       

     

       211
       211
       +
       	// Extract OpenGraph tags and fallbacks

     

       212
       212
       +
       	var pageTitle string

     

       213
       213
       +
       	var metaDescription string

     

       214
       214
       +
       

     

       215
       215
       +
       	var traverse func(*html.Node)

     

       216
       216
       +
       	traverse = func(n *html.Node) {

     

       217
       217
       +
       		if n.Type == html.ElementNode {

     

       218
       218
       +
       			switch n.Data {

     

       219
       219
       +
       			case "meta":

     

       220
       220
       +
       				property := getAttr(n, "property")

     

       221
       221
       +
       				name := getAttr(n, "name")

     

       222
       222
       +
       				content := getAttr(n, "content")

     

       223
       223
       +
       

     

       224
       224
       +
       				// OpenGraph tags

     

       225
       225
       +
       				if strings.HasPrefix(property, "og:") {

     

       226
       226
       +
       					switch property {

     

       227
       227
       +
       					case "og:title":

     

       228
       228
       +
       						if og.Title == "" {

     

       229
       229
       +
       							og.Title = content

     

       230
       230
       +
       						}

     

       231
       231
       +
       					case "og:description":

     

       232
       232
       +
       						if og.Description == "" {

     

       233
       233
       +
       							og.Description = content

     

       234
       234
       +
       						}

     

       235
       235
       +
       					case "og:image":

     

       236
       236
       +
       						if og.Image == "" {

     

       237
       237
       +
       							og.Image = content

     

       238
       238
       +
       						}

     

       239
       239
       +
       					case "og:url":

     

       240
       240
       +
       						if og.URL == "" {

     

       241
       241
       +
       							og.URL = content

     

       242
       242
       +
       						}

     

       243
       243
       +
       					}

     

       244
       244
       +
       				}

     

       245
       245
       +
       

     

       246
       246
       +
       				// Fallback meta tags

     

       247
       247
       +
       				if name == "description" && metaDescription == "" {

     

       248
       248
       +
       					metaDescription = content

     

       249
       249
       +
       				}

     

       250
       250
       +
       

     

       251
       251
       +
       			case "title":

     

       252
       252
       +
       				if pageTitle == "" && n.FirstChild != nil {

     

       253
       253
       +
       					pageTitle = n.FirstChild.Data

     

       254
       254
       +
       				}

     

       255
       255
       +
       			}

     

       256
       256
       +
       		}

     

       257
       257
       +
       

     

       258
       258
       +
       		for c := n.FirstChild; c != nil; c = c.NextSibling {

     

       259
       259
       +
       			traverse(c)

     

       260
       260
       +
       		}

     

       261
       261
       +
       	}

     

       262
       262
       +
       

     

       263
       263
       +
       	traverse(doc)

     

       264
       264
       +
       

     

       265
       265
       +
       	// Apply fallbacks

     

       266
       266
       +
       	if og.Title == "" {

     

       267
       267
       +
       		og.Title = pageTitle

     

       268
       268
       +
       	}

     

       269
       269
       +
       	if og.Description == "" {

     

       270
       270
       +
       		og.Description = metaDescription

     

       271
       271
       +
       	}

     

       272
       272
       +
       

     

       273
       273
       +
       	return og, nil

     

       274
       274
       +
       }

     

       275
       275
       +
       

     

       276
       276
       +
       // getAttr gets an attribute value from an HTML node

     

       277
       277
       +
       func getAttr(n *html.Node, key string) string {

     

       278
       278
       +
       	for _, attr := range n.Attr {

     

       279
       279
       +
       		if attr.Key == key {

     

       280
       280
       +
       			return attr.Val

     

       281
       281
       +
       		}

     

       282
       282
       +
       	}

     

       283
       283
       +
       	return ""

     

       284
       284
       +
       }

     

       285
       285
       +
       

     

       286
       286
       +
       // fetchKagiKite handles special unfurling for Kagi Kite news pages

     

       287
       287
       +
       // Kagi Kite pages use client-side rendering, so og:image tags aren't available at SSR time

     

       288
       288
       +
       // Instead, we parse the HTML to extract the story image from the page content

     

       289
       289
       +
       func fetchKagiKite(ctx context.Context, urlStr string, timeout time.Duration, userAgent string) (*UnfurlResult, error) {

     

       290
       290
       +
       	// Create HTTP request

     

       291
       291
       +
       	req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)

     

       292
       292
       +
       	if err != nil {

     

       293
       293
       +
       		return nil, fmt.Errorf("failed to create request: %w", err)

     

       294
       294
       +
       	}

     

       295
       295
       +
       

     

       296
       296
       +
       	req.Header.Set("User-Agent", userAgent)

     

       297
       297
       +
       

     

       298
       298
       +
       	// Create HTTP client with timeout

     

       299
       299
       +
       	client := &http.Client{Timeout: timeout}

     

       300
       300
       +
       	resp, err := client.Do(req)

     

       301
       301
       +
       	if err != nil {

     

       302
       302
       +
       		return nil, fmt.Errorf("failed to fetch URL: %w", err)

     

       303
       303
       +
       	}

     

       304
       304
       +
       	defer func() { _ = resp.Body.Close() }()

     

       305
       305
       +
       

     

       306
       306
       +
       	if resp.StatusCode != http.StatusOK {

     

       307
       307
       +
       		return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)

     

       308
       308
       +
       	}

     

       309
       309
       +
       

     

       310
       310
       +
       	// Limit response size to 10MB

     

       311
       311
       +
       	limitedReader := io.LimitReader(resp.Body, 10*1024*1024)

     

       312
       312
       +
       

     

       313
       313
       +
       	// Parse HTML

     

       314
       314
       +
       	doc, err := html.Parse(limitedReader)

     

       315
       315
       +
       	if err != nil {

     

       316
       316
       +
       		return nil, fmt.Errorf("failed to parse HTML: %w", err)

     

       317
       317
       +
       	}

     

       318
       318
       +
       

     

       319
       319
       +
       	result := &UnfurlResult{

     

       320
       320
       +
       		Type:     "article",

     

       321
       321
       +
       		URI:      urlStr,

     

       322
       322
       +
       		Domain:   "kite.kagi.com",

     

       323
       323
       +
       		Provider: "kagi",

     

       324
       324
       +
       	}

     

       325
       325
       +
       

     

       326
       326
       +
       	// First try OpenGraph tags (in case they get added in the future)

     

       327
       327
       +
       	var findOG func(*html.Node)

     

       328
       328
       +
       	findOG = func(n *html.Node) {

     

       329
       329
       +
       		if n.Type == html.ElementNode && n.Data == "meta" {

     

       330
       330
       +
       			var property, content string

     

       331
       331
       +
       			for _, attr := range n.Attr {

     

       332
       332
       +
       				if attr.Key == "property" {

     

       333
       333
       +
       					property = attr.Val

     

       334
       334
       +
       				} else if attr.Key == "content" {

     

       335
       335
       +
       					content = attr.Val

     

       336
       336
       +
       				}

     

       337
       337
       +
       			}

     

       338
       338
       +
       

     

       339
       339
       +
       			switch property {

     

       340
       340
       +
       			case "og:title":

     

       341
       341
       +
       				if result.Title == "" {

     

       342
       342
       +
       					result.Title = content

     

       343
       343
       +
       				}

     

       344
       344
       +
       			case "og:description":

     

       345
       345
       +
       				if result.Description == "" {

     

       346
       346
       +
       					result.Description = content

     

       347
       347
       +
       				}

     

       348
       348
       +
       			case "og:image":

     

       349
       349
       +
       				if result.ThumbnailURL == "" {

     

       350
       350
       +
       					result.ThumbnailURL = content

     

       351
       351
       +
       				}

     

       352
       352
       +
       			}

     

       353
       353
       +
       		}

     

       354
       354
       +
       		for c := n.FirstChild; c != nil; c = c.NextSibling {

     

       355
       355
       +
       			findOG(c)

     

       356
       356
       +
       		}

     

       357
       357
       +
       	}

     

       358
       358
       +
       	findOG(doc)

     

       359
       359
       +
       

     

       360
       360
       +
       	// Fallback: Extract from page content

     

       361
       361
       +
       	// Look for images with kagiproxy.com URLs (Kagi's image proxy)

     

       362
       362
       +
       	// Note: Skip the first image as it's often a shared header/logo

     

       363
       363
       +
       	if result.ThumbnailURL == "" {

     

       364
       364
       +
       		var images []struct {

     

       365
       365
       +
       			url string

     

       366
       366
       +
       			alt string

     

       367
       367
       +
       		}

     

       368
       368
       +
       

     

       369
       369
       +
       		var findImg func(*html.Node)

     

       370
       370
       +
       		findImg = func(n *html.Node) {

     

       371
       371
       +
       			if n.Type == html.ElementNode && n.Data == "img" {

     

       372
       372
       +
       				for _, attr := range n.Attr {

     

       373
       373
       +
       					if attr.Key == "src" && strings.Contains(attr.Val, "kagiproxy.com") {

     

       374
       374
       +
       						// Get alt text if available

     

       375
       375
       +
       						var altText string

     

       376
       376
       +
       						for _, a := range n.Attr {

     

       377
       377
       +
       							if a.Key == "alt" {

     

       378
       378
       +
       								altText = a.Val

     

       379
       379
       +
       								break

     

       380
       380
       +
       							}

     

       381
       381
       +
       						}

     

       382
       382
       +
       						images = append(images, struct {

     

       383
       383
       +
       							url string

     

       384
       384
       +
       							alt string

     

       385
       385
       +
       						}{url: attr.Val, alt: altText})

     

       386
       386
       +
       						break

     

       387
       387
       +
       					}

     

       388
       388
       +
       				}

     

       389
       389
       +
       			}

     

       390
       390
       +
       			for c := n.FirstChild; c != nil; c = c.NextSibling {

     

       391
       391
       +
       				findImg(c)

     

       392
       392
       +
       			}

     

       393
       393
       +
       		}

     

       394
       394
       +
       		findImg(doc)

     

       395
       395
       +
       

     

       396
       396
       +
       		// Skip first image (often shared header/logo), use second if available

     

       397
       397
       +
       		if len(images) > 1 {

     

       398
       398
       +
       			result.ThumbnailURL = images[1].url

     

       399
       399
       +
       			if result.Description == "" && images[1].alt != "" {

     

       400
       400
       +
       				result.Description = images[1].alt

     

       401
       401
       +
       			}

     

       402
       402
       +
       		} else if len(images) == 1 {

     

       403
       403
       +
       			// Only one image found, use it

     

       404
       404
       +
       			result.ThumbnailURL = images[0].url

     

       405
       405
       +
       			if result.Description == "" && images[0].alt != "" {

     

       406
       406
       +
       				result.Description = images[0].alt

     

       407
       407
       +
       			}

     

       408
       408
       +
       		}

     

       409
       409
       +
       	}

     

       410
       410
       +
       

     

       411
       411
       +
       	// Fallback to <title> tag if og:title not found

     

       412
       412
       +
       	if result.Title == "" {

     

       413
       413
       +
       		var findTitle func(*html.Node) string

     

       414
       414
       +
       		findTitle = func(n *html.Node) string {

     

       415
       415
       +
       			if n.Type == html.ElementNode && n.Data == "title" {

     

       416
       416
       +
       				if n.FirstChild != nil && n.FirstChild.Type == html.TextNode {

     

       417
       417
       +
       					return n.FirstChild.Data

     

       418
       418
       +
       				}

     

       419
       419
       +
       			}

     

       420
       420
       +
       			for c := n.FirstChild; c != nil; c = c.NextSibling {

     

       421
       421
       +
       				if title := findTitle(c); title != "" {

     

       422
       422
       +
       					return title

     

       423
       423
       +
       				}

     

       424
       424
       +
       			}

     

       425
       425
       +
       			return ""

     

       426
       426
       +
       		}

     

       427
       427
       +
       		result.Title = findTitle(doc)

     

       428
       428
       +
       	}

     

       429
       429
       +
       

     

       430
       430
       +
       	// If still no image, return error

     

       431
       431
       +
       	if result.ThumbnailURL == "" {

     

       432
       432
       +
       		return nil, fmt.Errorf("no image found in Kagi page")

     

       433
       433
       +
       	}

     

       434
       434
       +
       

     

       435
       435
       +
       	return result, nil

     

       436
       436
       +
       }

+170

internal/core/unfurl/service.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import (

     

       4
       4
       +
       	"context"

     

       5
       5
       +
       	"fmt"

     

       6
       6
       +
       	"log"

     

       7
       7
       +
       	"time"

     

       8
       8
       +
       )

     

       9
       9
       +
       

     

       10
       10
       +
       // Service handles URL unfurling with caching

     

       11
       11
       +
       type Service interface {

     

       12
       12
       +
       	UnfurlURL(ctx context.Context, url string) (*UnfurlResult, error)

     

       13
       13
       +
       	IsSupported(url string) bool

     

       14
       14
       +
       }

     

       15
       15
       +
       

     

       16
       16
       +
       type service struct {

     

       17
       17
       +
       	repo           Repository

     

       18
       18
       +
       	circuitBreaker *circuitBreaker

     

       19
       19
       +
       	userAgent      string

     

       20
       20
       +
       	timeout        time.Duration

     

       21
       21
       +
       	cacheTTL       time.Duration

     

       22
       22
       +
       }

     

       23
       23
       +
       

     

       24
       24
       +
       // NewService creates a new unfurl service

     

       25
       25
       +
       func NewService(repo Repository, opts ...ServiceOption) Service {

     

       26
       26
       +
       	s := &service{

     

       27
       27
       +
       		repo:           repo,

     

       28
       28
       +
       		timeout:        10 * time.Second,

     

       29
       29
       +
       		userAgent:      "CovesBot/1.0 (+https://coves.social)",

     

       30
       30
       +
       		cacheTTL:       24 * time.Hour,

     

       31
       31
       +
       		circuitBreaker: newCircuitBreaker(),

     

       32
       32
       +
       	}

     

       33
       33
       +
       

     

       34
       34
       +
       	for _, opt := range opts {

     

       35
       35
       +
       		opt(s)

     

       36
       36
       +
       	}

     

       37
       37
       +
       

     

       38
       38
       +
       	return s

     

       39
       39
       +
       }

     

       40
       40
       +
       

     

       41
       41
       +
       // ServiceOption configures the service

     

       42
       42
       +
       type ServiceOption func(*service)

     

       43
       43
       +
       

     

       44
       44
       +
       // WithTimeout sets the HTTP timeout for oEmbed requests

     

       45
       45
       +
       func WithTimeout(timeout time.Duration) ServiceOption {

     

       46
       46
       +
       	return func(s *service) {

     

       47
       47
       +
       		s.timeout = timeout

     

       48
       48
       +
       	}

     

       49
       49
       +
       }

     

       50
       50
       +
       

     

       51
       51
       +
       // WithUserAgent sets the User-Agent header for oEmbed requests

     

       52
       52
       +
       func WithUserAgent(userAgent string) ServiceOption {

     

       53
       53
       +
       	return func(s *service) {

     

       54
       54
       +
       		s.userAgent = userAgent

     

       55
       55
       +
       	}

     

       56
       56
       +
       }

     

       57
       57
       +
       

     

       58
       58
       +
       // WithCacheTTL sets the cache TTL

     

       59
       59
       +
       func WithCacheTTL(ttl time.Duration) ServiceOption {

     

       60
       60
       +
       	return func(s *service) {

     

       61
       61
       +
       		s.cacheTTL = ttl

     

       62
       62
       +
       	}

     

       63
       63
       +
       }

     

       64
       64
       +
       

     

       65
       65
       +
       // IsSupported returns true if we can unfurl this URL

     

       66
       66
       +
       func (s *service) IsSupported(url string) bool {

     

       67
       67
       +
       	return isSupported(url)

     

       68
       68
       +
       }

     

       69
       69
       +
       

     

       70
       70
       +
       // UnfurlURL fetches metadata for a URL (with caching)

     

       71
       71
       +
       func (s *service) UnfurlURL(ctx context.Context, urlStr string) (*UnfurlResult, error) {

     

       72
       72
       +
       	// 1. Check cache first

     

       73
       73
       +
       	cached, err := s.repo.Get(ctx, urlStr)

     

       74
       74
       +
       	if err == nil && cached != nil {

     

       75
       75
       +
       		log.Printf("[UNFURL] Cache hit for %s (provider: %s)", urlStr, cached.Provider)

     

       76
       76
       +
       		return cached, nil

     

       77
       77
       +
       	}

     

       78
       78
       +
       

     

       79
       79
       +
       	// 2. Check if we support this URL

     

       80
       80
       +
       	if !isSupported(urlStr) {

     

       81
       81
       +
       		return nil, fmt.Errorf("unsupported URL: %s", urlStr)

     

       82
       82
       +
       	}

     

       83
       83
       +
       

     

       84
       84
       +
       	var result *UnfurlResult

     

       85
       85
       +
       	domain := extractDomain(urlStr)

     

       86
       86
       +
       

     

       87
       87
       +
       	// 3. Smart routing: Special handling for Kagi Kite (client-side rendered, no og:image tags)

     

       88
       88
       +
       	if domain == "kite.kagi.com" {

     

       89
       89
       +
       		provider := "kagi"

     

       90
       90
       +
       

     

       91
       91
       +
       		// Check circuit breaker

     

       92
       92
       +
       		canAttempt, err := s.circuitBreaker.canAttempt(provider)

     

       93
       93
       +
       		if !canAttempt {

     

       94
       94
       +
       			log.Printf("[UNFURL] Skipping %s due to circuit breaker: %v", urlStr, err)

     

       95
       95
       +
       			return nil, err

     

       96
       96
       +
       		}

     

       97
       97
       +
       

     

       98
       98
       +
       		log.Printf("[UNFURL] Cache miss for %s, fetching via Kagi parser...", urlStr)

     

       99
       99
       +
       		result, err = fetchKagiKite(ctx, urlStr, s.timeout, s.userAgent)

     

       100
       100
       +
       		if err != nil {

     

       101
       101
       +
       			s.circuitBreaker.recordFailure(provider, err)

     

       102
       102
       +
       			return nil, err

     

       103
       103
       +
       		}

     

       104
       104
       +
       

     

       105
       105
       +
       		s.circuitBreaker.recordSuccess(provider)

     

       106
       106
       +
       

     

       107
       107
       +
       		// Cache result

     

       108
       108
       +
       		if cacheErr := s.repo.Set(ctx, urlStr, result, s.cacheTTL); cacheErr != nil {

     

       109
       109
       +
       			log.Printf("[UNFURL] Warning: failed to cache result: %v", cacheErr)

     

       110
       110
       +
       		}

     

       111
       111
       +
       		return result, nil

     

       112
       112
       +
       	}

     

       113
       113
       +
       

     

       114
       114
       +
       	// 4. Check if this is a known oEmbed provider

     

       115
       115
       +
       	if isOEmbedProvider(urlStr) {

     

       116
       116
       +
       		provider := domain // Use domain as provider name (e.g., "streamable.com", "youtube.com")

     

       117
       117
       +
       

     

       118
       118
       +
       		// Check circuit breaker

     

       119
       119
       +
       		canAttempt, err := s.circuitBreaker.canAttempt(provider)

     

       120
       120
       +
       		if !canAttempt {

     

       121
       121
       +
       			log.Printf("[UNFURL] Skipping %s due to circuit breaker: %v", urlStr, err)

     

       122
       122
       +
       			return nil, err

     

       123
       123
       +
       		}

     

       124
       124
       +
       

     

       125
       125
       +
       		log.Printf("[UNFURL] Cache miss for %s, fetching from oEmbed...", urlStr)

     

       126
       126
       +
       

     

       127
       127
       +
       		// Fetch from oEmbed provider

     

       128
       128
       +
       		oembed, err := fetchOEmbed(ctx, urlStr, s.timeout, s.userAgent)

     

       129
       129
       +
       		if err != nil {

     

       130
       130
       +
       			s.circuitBreaker.recordFailure(provider, err)

     

       131
       131
       +
       			return nil, fmt.Errorf("failed to fetch oEmbed data: %w", err)

     

       132
       132
       +
       		}

     

       133
       133
       +
       

     

       134
       134
       +
       		s.circuitBreaker.recordSuccess(provider)

     

       135
       135
       +
       

     

       136
       136
       +
       		// Convert to UnfurlResult

     

       137
       137
       +
       		result = mapOEmbedToResult(oembed, urlStr)

     

       138
       138
       +
       	} else {

     

       139
       139
       +
       		provider := "opengraph"

     

       140
       140
       +
       

     

       141
       141
       +
       		// Check circuit breaker

     

       142
       142
       +
       		canAttempt, err := s.circuitBreaker.canAttempt(provider)

     

       143
       143
       +
       		if !canAttempt {

     

       144
       144
       +
       			log.Printf("[UNFURL] Skipping %s due to circuit breaker: %v", urlStr, err)

     

       145
       145
       +
       			return nil, err

     

       146
       146
       +
       		}

     

       147
       147
       +
       

     

       148
       148
       +
       		log.Printf("[UNFURL] Cache miss for %s, fetching via OpenGraph...", urlStr)

     

       149
       149
       +
       

     

       150
       150
       +
       		// Fetch via OpenGraph

     

       151
       151
       +
       		result, err = fetchOpenGraph(ctx, urlStr, s.timeout, s.userAgent)

     

       152
       152
       +
       		if err != nil {

     

       153
       153
       +
       			s.circuitBreaker.recordFailure(provider, err)

     

       154
       154
       +
       			return nil, fmt.Errorf("failed to fetch OpenGraph data: %w", err)

     

       155
       155
       +
       		}

     

       156
       156
       +
       

     

       157
       157
       +
       		s.circuitBreaker.recordSuccess(provider)

     

       158
       158
       +
       	}

     

       159
       159
       +
       

     

       160
       160
       +
       	// 5. Store in cache

     

       161
       161
       +
       	if cacheErr := s.repo.Set(ctx, urlStr, result, s.cacheTTL); cacheErr != nil {

     

       162
       162
       +
       		// Log but don't fail - cache is best-effort

     

       163
       163
       +
       		log.Printf("[UNFURL] Warning: Failed to cache result for %s: %v", urlStr, cacheErr)

     

       164
       164
       +
       	}

     

       165
       165
       +
       

     

       166
       166
       +
       	log.Printf("[UNFURL] Successfully unfurled %s (provider: %s, type: %s)",

     

       167
       167
       +
       		urlStr, result.Provider, result.Type)

     

       168
       168
       +
       

     

       169
       169
       +
       	return result, nil

     

       170
       170
       +
       }

+27

internal/core/unfurl/types.go

···

       1
       1
       +
       package unfurl

     

       2
       2
       +
       

     

       3
       3
       +
       import "time"

     

       4
       4
       +
       

     

       5
       5
       +
       // UnfurlResult represents the result of unfurling a URL

     

       6
       6
       +
       type UnfurlResult struct {

     

       7
       7
       +
       	Type         string `json:"type"`         // "video", "article", "image", "website"

     

       8
       8
       +
       	URI          string `json:"uri"`          // Original URL

     

       9
       9
       +
       	Title        string `json:"title"`        // Page/video title

     

       10
       10
       +
       	Description  string `json:"description"`  // Page/video description

     

       11
       11
       +
       	ThumbnailURL string `json:"thumbnailUrl"` // Preview image URL

     

       12
       12
       +
       	Provider     string `json:"provider"`     // "streamable", "youtube", "reddit"

     

       13
       13
       +
       	Domain       string `json:"domain"`       // Domain of the URL

     

       14
       14
       +
       	Width        int    `json:"width"`        // Media width (if applicable)

     

       15
       15
       +
       	Height       int    `json:"height"`       // Media height (if applicable)

     

       16
       16
       +
       }

     

       17
       17
       +
       

     

       18
       18
       +
       // CacheEntry represents a cached unfurl result with metadata

     

       19
       19
       +
       type CacheEntry struct {

     

       20
       20
       +
       	FetchedAt    time.Time    `db:"fetched_at"`

     

       21
       21
       +
       	ExpiresAt    time.Time    `db:"expires_at"`

     

       22
       22
       +
       	CreatedAt    time.Time    `db:"created_at"`

     

       23
       23
       +
       	ThumbnailURL *string      `db:"thumbnail_url"`

     

       24
       24
       +
       	URL          string       `db:"url"`

     

       25
       25
       +
       	Provider     string       `db:"provider"`

     

       26
       26
       +
       	Metadata     UnfurlResult `db:"metadata"`

     

       27
       27
       +
       }