1// heavily inspired by gitea's model (basically copy-pasted)
2package pulls_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 tlog "tangled.org/core/log"
24)
25
26const (
27 pullIndexerAnalyzer = "pullIndexer"
28 pullIndexerDocType = "pullIndexerDocType"
29
30 unicodeNormalizeName = "uicodeNormalize"
31)
32
33type Indexer struct {
34 indexer bleve.Index
35 path string
36}
37
38func NewIndexer(indexDir string) *Indexer {
39 return &Indexer{
40 path: indexDir,
41 }
42}
43
44// Init initializes the indexer
45func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
46 l := tlog.FromContext(ctx)
47 existed, err := ix.intialize(ctx)
48 if err != nil {
49 log.Fatalln("failed to initialize pull indexer", err)
50 }
51 if !existed {
52 l.Debug("Populating the pull indexer")
53 err := PopulateIndexer(ctx, ix, e)
54 if err != nil {
55 log.Fatalln("failed to populate pull indexer", err)
56 }
57 }
58 l.Info("Initialized the pull indexer")
59}
60
61func generatePullIndexMapping() (mapping.IndexMapping, error) {
62 mapping := bleve.NewIndexMapping()
63 docMapping := bleve.NewDocumentMapping()
64
65 textFieldMapping := bleve.NewTextFieldMapping()
66 textFieldMapping.Store = false
67 textFieldMapping.IncludeInAll = false
68
69 keywordFieldMapping := bleve.NewKeywordFieldMapping()
70 keywordFieldMapping.Store = false
71 keywordFieldMapping.IncludeInAll = false
72
73 // numericFieldMapping := bleve.NewNumericFieldMapping()
74
75 docMapping.AddFieldMappingsAt("title", textFieldMapping)
76 docMapping.AddFieldMappingsAt("body", textFieldMapping)
77
78 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
79 docMapping.AddFieldMappingsAt("state", keywordFieldMapping)
80
81 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
82 "type": unicodenorm.Name,
83 "form": unicodenorm.NFC,
84 })
85 if err != nil {
86 return nil, err
87 }
88
89 err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{
90 "type": custom.Name,
91 "char_filters": []string{},
92 "tokenizer": unicode.Name,
93 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
94 })
95 if err != nil {
96 return nil, err
97 }
98
99 mapping.DefaultAnalyzer = pullIndexerAnalyzer
100 mapping.AddDocumentMapping(pullIndexerDocType, docMapping)
101 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
102 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
103
104 return mapping, nil
105}
106
107func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
108 if ix.indexer != nil {
109 return false, errors.New("indexer is already initialized")
110 }
111
112 indexer, err := openIndexer(ctx, ix.path)
113 if err != nil {
114 return false, err
115 }
116 if indexer != nil {
117 ix.indexer = indexer
118 return true, nil
119 }
120
121 mapping, err := generatePullIndexMapping()
122 if err != nil {
123 return false, err
124 }
125 indexer, err = bleve.New(ix.path, mapping)
126 if err != nil {
127 return false, err
128 }
129
130 ix.indexer = indexer
131
132 return false, nil
133}
134
135func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
136 l := tlog.FromContext(ctx)
137 indexer, err := bleve.Open(path)
138 if err != nil {
139 if errors.Is(err, upsidedown.IncompatibleVersion) {
140 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
141 return nil, os.RemoveAll(path)
142 }
143 return nil, nil
144 }
145 return indexer, nil
146}
147
148func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
149 l := tlog.FromContext(ctx)
150
151 pulls, err := db.GetPulls(e)
152 if err != nil {
153 return err
154 }
155 count := len(pulls)
156 err = ix.Index(ctx, pulls...)
157 if err != nil {
158 return err
159 }
160 l.Info("pulls indexed", "count", count)
161 return err
162}
163
164// pullData data stored and will be indexed
165type pullData struct {
166 ID int64 `json:"id"`
167 RepoAt string `json:"repo_at"`
168 PullID int `json:"pull_id"`
169 Title string `json:"title"`
170 Body string `json:"body"`
171 State string `json:"state"`
172
173 Comments []pullCommentData `json:"comments"`
174}
175
176func makePullData(pull *models.Pull) *pullData {
177 return &pullData{
178 ID: int64(pull.ID),
179 RepoAt: pull.RepoAt.String(),
180 PullID: pull.PullId,
181 Title: pull.Title,
182 Body: pull.Body,
183 State: pull.State.String(),
184 }
185}
186
187// Type returns the document type, for bleve's mapping.Classifier interface.
188func (i *pullData) Type() string {
189 return pullIndexerDocType
190}
191
192type pullCommentData struct {
193 Body string `json:"body"`
194}
195
196type searchResult struct {
197 Hits []int64
198 Total uint64
199}
200
201const maxBatchSize = 20
202
203func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error {
204 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
205 for _, pull := range pulls {
206 pullData := makePullData(pull)
207 if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil {
208 return err
209 }
210 }
211 return batch.Flush()
212}
213
214func (ix *Indexer) Delete(ctx context.Context, pullID int64) error {
215 return ix.indexer.Delete(base36.Encode(pullID))
216}
217
218// Search searches for pulls
219func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) {
220 var queries []query.Query
221
222 // TODO(boltless): remove this after implementing pulls page pagination
223 limit := opts.Page.Limit
224 if limit == 0 {
225 limit = 500
226 }
227
228 if opts.Keyword != "" {
229 queries = append(queries, bleve.NewDisjunctionQuery(
230 bleveutil.MatchAndQuery("title", opts.Keyword, pullIndexerAnalyzer, 0),
231 bleveutil.MatchAndQuery("body", opts.Keyword, pullIndexerAnalyzer, 0),
232 ))
233 }
234 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
235 queries = append(queries, bleveutil.KeywordFieldQuery("state", opts.State.String()))
236
237 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
238 searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false)
239 res, err := ix.indexer.SearchInContext(ctx, searchReq)
240 if err != nil {
241 return nil, nil
242 }
243 ret := &searchResult{
244 Total: res.Total,
245 Hits: make([]int64, len(res.Hits)),
246 }
247 for i, hit := range res.Hits {
248 id, err := base36.Decode(hit.ID)
249 if err != nil {
250 return nil, err
251 }
252 ret.Hits[i] = id
253 }
254 return ret, nil
255}