···
1
+
// heavily inspired by gitea's model (basically copy-pasted)
2
+
package pulls_indexer
10
+
"github.com/blevesearch/bleve/v2"
11
+
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12
+
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13
+
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14
+
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15
+
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16
+
"github.com/blevesearch/bleve/v2/index/upsidedown"
17
+
"github.com/blevesearch/bleve/v2/mapping"
18
+
"github.com/blevesearch/bleve/v2/search/query"
19
+
"tangled.org/core/appview/db"
20
+
"tangled.org/core/appview/indexer/base36"
21
+
"tangled.org/core/appview/indexer/bleve"
22
+
"tangled.org/core/appview/models"
23
+
tlog "tangled.org/core/log"
27
+
pullIndexerAnalyzer = "pullIndexer"
28
+
pullIndexerDocType = "pullIndexerDocType"
30
+
unicodeNormalizeName = "uicodeNormalize"
33
+
type Indexer struct {
38
+
func NewIndexer(indexDir string) *Indexer {
44
+
// Init initializes the indexer
45
+
func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
46
+
l := tlog.FromContext(ctx)
47
+
existed, err := ix.intialize(ctx)
49
+
log.Fatalln("failed to initialize pull indexer", err)
52
+
l.Debug("Populating the pull indexer")
53
+
err := PopulateIndexer(ctx, ix, e)
55
+
log.Fatalln("failed to populate pull indexer", err)
58
+
l.Info("Initialized the pull indexer")
61
+
func generatePullIndexMapping() (mapping.IndexMapping, error) {
62
+
mapping := bleve.NewIndexMapping()
63
+
docMapping := bleve.NewDocumentMapping()
65
+
textFieldMapping := bleve.NewTextFieldMapping()
66
+
textFieldMapping.Store = false
67
+
textFieldMapping.IncludeInAll = false
69
+
keywordFieldMapping := bleve.NewKeywordFieldMapping()
70
+
keywordFieldMapping.Store = false
71
+
keywordFieldMapping.IncludeInAll = false
73
+
// numericFieldMapping := bleve.NewNumericFieldMapping()
75
+
docMapping.AddFieldMappingsAt("title", textFieldMapping)
76
+
docMapping.AddFieldMappingsAt("body", textFieldMapping)
78
+
docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
79
+
docMapping.AddFieldMappingsAt("state", keywordFieldMapping)
81
+
err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
82
+
"type": unicodenorm.Name,
83
+
"form": unicodenorm.NFC,
89
+
err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{
90
+
"type": custom.Name,
91
+
"char_filters": []string{},
92
+
"tokenizer": unicode.Name,
93
+
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
99
+
mapping.DefaultAnalyzer = pullIndexerAnalyzer
100
+
mapping.AddDocumentMapping(pullIndexerDocType, docMapping)
101
+
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
102
+
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
104
+
return mapping, nil
107
+
func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
108
+
if ix.indexer != nil {
109
+
return false, errors.New("indexer is already initialized")
112
+
indexer, err := openIndexer(ctx, ix.path)
116
+
if indexer != nil {
117
+
ix.indexer = indexer
121
+
mapping, err := generatePullIndexMapping()
125
+
indexer, err = bleve.New(ix.path, mapping)
130
+
ix.indexer = indexer
135
+
func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
136
+
l := tlog.FromContext(ctx)
137
+
indexer, err := bleve.Open(path)
139
+
if errors.Is(err, upsidedown.IncompatibleVersion) {
140
+
l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
141
+
return nil, os.RemoveAll(path)
145
+
return indexer, nil
148
+
func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
149
+
l := tlog.FromContext(ctx)
151
+
pulls, err := db.GetPulls(e)
155
+
count := len(pulls)
156
+
err = ix.Index(ctx, pulls...)
160
+
l.Info("pulls indexed", "count", count)
164
+
// pullData data stored and will be indexed
165
+
type pullData struct {
166
+
ID int64 `json:"id"`
167
+
RepoAt string `json:"repo_at"`
168
+
PullID int `json:"pull_id"`
169
+
Title string `json:"title"`
170
+
Body string `json:"body"`
171
+
State string `json:"state"`
173
+
Comments []pullCommentData `json:"comments"`
176
+
func makePullData(pull *models.Pull) *pullData {
178
+
ID: int64(pull.ID),
179
+
RepoAt: pull.RepoAt.String(),
180
+
PullID: pull.PullId,
183
+
State: pull.State.String(),
187
+
// Type returns the document type, for bleve's mapping.Classifier interface.
188
+
func (i *pullData) Type() string {
189
+
return pullIndexerDocType
192
+
type pullCommentData struct {
193
+
Body string `json:"body"`
196
+
type searchResult struct {
201
+
const maxBatchSize = 20
203
+
func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error {
204
+
batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
205
+
for _, pull := range pulls {
206
+
pullData := makePullData(pull)
207
+
if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil {
211
+
return batch.Flush()
214
+
func (ix *Indexer) Delete(ctx context.Context, pullID int64) error {
215
+
return ix.indexer.Delete(base36.Encode(pullID))
218
+
// Search searches for pulls
219
+
func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) {
220
+
var queries []query.Query
222
+
// TODO(boltless): remove this after implementing pulls page pagination
223
+
limit := opts.Page.Limit
228
+
if opts.Keyword != "" {
229
+
queries = append(queries, bleve.NewDisjunctionQuery(
230
+
bleveutil.MatchAndQuery("title", opts.Keyword, pullIndexerAnalyzer, 0),
231
+
bleveutil.MatchAndQuery("body", opts.Keyword, pullIndexerAnalyzer, 0),
234
+
queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
235
+
queries = append(queries, bleveutil.KeywordFieldQuery("state", opts.State.String()))
237
+
var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
238
+
searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false)
239
+
res, err := ix.indexer.SearchInContext(ctx, searchReq)
243
+
ret := &searchResult{
245
+
Hits: make([]int64, len(res.Hits)),
247
+
for i, hit := range res.Hits {
248
+
id, err := base36.Decode(hit.ID)