1// heavily inspired by gitea's model (basically copy-pasted)
2package issues_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 "tangled.org/core/appview/pagination"
24 tlog "tangled.org/core/log"
25)
26
27const (
28 issueIndexerAnalyzer = "issueIndexer"
29 issueIndexerDocType = "issueIndexerDocType"
30
31 unicodeNormalizeName = "uicodeNormalize"
32)
33
34type Indexer struct {
35 indexer bleve.Index
36 path string
37}
38
39func NewIndexer(indexDir string) *Indexer {
40 return &Indexer{
41 path: indexDir,
42 }
43}
44
45// Init initializes the indexer
46func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
47 l := tlog.FromContext(ctx)
48 existed, err := ix.intialize(ctx)
49 if err != nil {
50 log.Fatalln("failed to initialize issue indexer", err)
51 }
52 if !existed {
53 l.Debug("Populating the issue indexer")
54 err := PopulateIndexer(ctx, ix, e)
55 if err != nil {
56 log.Fatalln("failed to populate issue indexer", err)
57 }
58 }
59
60 count, _ := ix.indexer.DocCount()
61 l.Info("Initialized the issue indexer", "docCount", count)
62}
63
64func generateIssueIndexMapping() (mapping.IndexMapping, error) {
65 mapping := bleve.NewIndexMapping()
66 docMapping := bleve.NewDocumentMapping()
67
68 textFieldMapping := bleve.NewTextFieldMapping()
69 textFieldMapping.Store = false
70 textFieldMapping.IncludeInAll = false
71
72 boolFieldMapping := bleve.NewBooleanFieldMapping()
73 boolFieldMapping.Store = false
74 boolFieldMapping.IncludeInAll = false
75
76 keywordFieldMapping := bleve.NewKeywordFieldMapping()
77 keywordFieldMapping.Store = false
78 keywordFieldMapping.IncludeInAll = false
79
80 // numericFieldMapping := bleve.NewNumericFieldMapping()
81
82 docMapping.AddFieldMappingsAt("title", textFieldMapping)
83 docMapping.AddFieldMappingsAt("body", textFieldMapping)
84
85 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
86 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
87
88 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
89 "type": unicodenorm.Name,
90 "form": unicodenorm.NFC,
91 })
92 if err != nil {
93 return nil, err
94 }
95
96 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
97 "type": custom.Name,
98 "char_filters": []string{},
99 "tokenizer": unicode.Name,
100 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
101 })
102 if err != nil {
103 return nil, err
104 }
105
106 mapping.DefaultAnalyzer = issueIndexerAnalyzer
107 mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
108 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
109 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
110
111 return mapping, nil
112}
113
114func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
115 if ix.indexer != nil {
116 return false, errors.New("indexer is already initialized")
117 }
118
119 indexer, err := openIndexer(ctx, ix.path)
120 if err != nil {
121 return false, err
122 }
123 if indexer != nil {
124 ix.indexer = indexer
125 return true, nil
126 }
127
128 mapping, err := generateIssueIndexMapping()
129 if err != nil {
130 return false, err
131 }
132 indexer, err = bleve.New(ix.path, mapping)
133 if err != nil {
134 return false, err
135 }
136
137 ix.indexer = indexer
138
139 return false, nil
140}
141
142func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
143 l := tlog.FromContext(ctx)
144 indexer, err := bleve.Open(path)
145 if err != nil {
146 if errors.Is(err, upsidedown.IncompatibleVersion) {
147 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
148 return nil, os.RemoveAll(path)
149 }
150 return nil, nil
151 }
152 return indexer, nil
153}
154
155func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
156 l := tlog.FromContext(ctx)
157 count := 0
158 err := pagination.IterateAll(
159 func(page pagination.Page) ([]models.Issue, error) {
160 return db.GetIssuesPaginated(e, page)
161 },
162 func(issues []models.Issue) error {
163 count += len(issues)
164 return ix.Index(ctx, issues...)
165 },
166 )
167 l.Info("issues indexed", "count", count)
168 return err
169}
170
171// issueData data stored and will be indexed
172type issueData struct {
173 ID int64 `json:"id"`
174 RepoAt string `json:"repo_at"`
175 IssueID int `json:"issue_id"`
176 Title string `json:"title"`
177 Body string `json:"body"`
178
179 IsOpen bool `json:"is_open"`
180 Comments []IssueCommentData `json:"comments"`
181}
182
183func makeIssueData(issue *models.Issue) *issueData {
184 return &issueData{
185 ID: issue.Id,
186 RepoAt: issue.RepoAt.String(),
187 IssueID: issue.IssueId,
188 Title: issue.Title,
189 Body: issue.Body,
190 IsOpen: issue.Open,
191 }
192}
193
194// Type returns the document type, for bleve's mapping.Classifier interface.
195func (i *issueData) Type() string {
196 return issueIndexerDocType
197}
198
199type IssueCommentData struct {
200 Body string `json:"body"`
201}
202
203type SearchResult struct {
204 Hits []int64
205 Total uint64
206}
207
208const maxBatchSize = 20
209
210func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error {
211 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
212 for _, issue := range issues {
213 issueData := makeIssueData(&issue)
214 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil {
215 return err
216 }
217 }
218 return batch.Flush()
219}
220
221func (ix *Indexer) Delete(ctx context.Context, issueId int64) error {
222 return ix.indexer.Delete(base36.Encode(issueId))
223}
224
225// Search searches for issues
226func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) {
227 var queries []query.Query
228
229 if opts.Keyword != "" {
230 queries = append(queries, bleve.NewDisjunctionQuery(
231 bleveutil.MatchAndQuery("title", opts.Keyword, issueIndexerAnalyzer, 0),
232 bleveutil.MatchAndQuery("body", opts.Keyword, issueIndexerAnalyzer, 0),
233 ))
234 }
235 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
236 queries = append(queries, bleveutil.BoolFieldQuery("is_open", opts.IsOpen))
237 // TODO: append more queries
238
239 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
240 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false)
241 res, err := ix.indexer.SearchInContext(ctx, searchReq)
242 if err != nil {
243 return nil, nil
244 }
245 ret := &SearchResult{
246 Total: res.Total,
247 Hits: make([]int64, len(res.Hits)),
248 }
249 for i, hit := range res.Hits {
250 id, err := base36.Decode(hit.ID)
251 if err != nil {
252 return nil, err
253 }
254 ret.Hits[i] = id
255 }
256 return ret, nil
257}