forked from tangled.org/core
Monorepo for Tangled — https://tangled.org
1// heavily inspired by gitea's model (basically copy-pasted) 2package pulls_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 tlog "tangled.org/core/log" 24) 25 26const ( 27 pullIndexerAnalyzer = "pullIndexer" 28 pullIndexerDocType = "pullIndexerDocType" 29 30 unicodeNormalizeName = "uicodeNormalize" 31) 32 33type Indexer struct { 34 indexer bleve.Index 35 path string 36} 37 38func NewIndexer(indexDir string) *Indexer { 39 return &Indexer{ 40 path: indexDir, 41 } 42} 43 44// Init initializes the indexer 45func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 46 l := tlog.FromContext(ctx) 47 existed, err := ix.intialize(ctx) 48 if err != nil { 49 log.Fatalln("failed to initialize pull indexer", err) 50 } 51 if !existed { 52 l.Debug("Populating the pull indexer") 53 err := PopulateIndexer(ctx, ix, e) 54 if err != nil { 55 log.Fatalln("failed to populate pull indexer", err) 56 } 57 } 58 l.Info("Initialized the pull indexer") 59} 60 61func generatePullIndexMapping() (mapping.IndexMapping, error) { 62 mapping := bleve.NewIndexMapping() 63 docMapping := bleve.NewDocumentMapping() 64 65 textFieldMapping := bleve.NewTextFieldMapping() 66 textFieldMapping.Store = false 67 textFieldMapping.IncludeInAll = false 68 69 keywordFieldMapping := bleve.NewKeywordFieldMapping() 70 keywordFieldMapping.Store = false 71 keywordFieldMapping.IncludeInAll = false 72 73 // numericFieldMapping := bleve.NewNumericFieldMapping() 74 75 docMapping.AddFieldMappingsAt("title", textFieldMapping) 76 docMapping.AddFieldMappingsAt("body", textFieldMapping) 77 78 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 79 docMapping.AddFieldMappingsAt("state", keywordFieldMapping) 80 81 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 82 "type": unicodenorm.Name, 83 "form": unicodenorm.NFC, 84 }) 85 if err != nil { 86 return nil, err 87 } 88 89 err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{ 90 "type": custom.Name, 91 "char_filters": []string{}, 92 "tokenizer": unicode.Name, 93 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 94 }) 95 if err != nil { 96 return nil, err 97 } 98 99 mapping.DefaultAnalyzer = pullIndexerAnalyzer 100 mapping.AddDocumentMapping(pullIndexerDocType, docMapping) 101 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 102 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 103 104 return mapping, nil 105} 106 107func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 108 if ix.indexer != nil { 109 return false, errors.New("indexer is already initialized") 110 } 111 112 indexer, err := openIndexer(ctx, ix.path) 113 if err != nil { 114 return false, err 115 } 116 if indexer != nil { 117 ix.indexer = indexer 118 return true, nil 119 } 120 121 mapping, err := generatePullIndexMapping() 122 if err != nil { 123 return false, err 124 } 125 indexer, err = bleve.New(ix.path, mapping) 126 if err != nil { 127 return false, err 128 } 129 130 ix.indexer = indexer 131 132 return false, nil 133} 134 135func openIndexer(ctx context.Context, path string) (bleve.Index, error) { 136 l := tlog.FromContext(ctx) 137 indexer, err := bleve.Open(path) 138 if err != nil { 139 if errors.Is(err, upsidedown.IncompatibleVersion) { 140 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 141 return nil, os.RemoveAll(path) 142 } 143 return nil, nil 144 } 145 return indexer, nil 146} 147 148func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 149 l := tlog.FromContext(ctx) 150 151 pulls, err := db.GetPulls(e) 152 if err != nil { 153 return err 154 } 155 count := len(pulls) 156 err = ix.Index(ctx, pulls...) 157 if err != nil { 158 return err 159 } 160 l.Info("pulls indexed", "count", count) 161 return err 162} 163 164// pullData data stored and will be indexed 165type pullData struct { 166 ID int64 `json:"id"` 167 RepoAt string `json:"repo_at"` 168 PullID int `json:"pull_id"` 169 Title string `json:"title"` 170 Body string `json:"body"` 171 State string `json:"state"` 172 173 Comments []pullCommentData `json:"comments"` 174} 175 176func makePullData(pull *models.Pull) *pullData { 177 return &pullData{ 178 ID: int64(pull.ID), 179 RepoAt: pull.RepoAt.String(), 180 PullID: pull.PullId, 181 Title: pull.Title, 182 Body: pull.Body, 183 State: pull.State.String(), 184 } 185} 186 187// Type returns the document type, for bleve's mapping.Classifier interface. 188func (i *pullData) Type() string { 189 return pullIndexerDocType 190} 191 192type pullCommentData struct { 193 Body string `json:"body"` 194} 195 196type searchResult struct { 197 Hits []int64 198 Total uint64 199} 200 201const maxBatchSize = 20 202 203func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error { 204 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 205 for _, pull := range pulls { 206 pullData := makePullData(pull) 207 if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil { 208 return err 209 } 210 } 211 return batch.Flush() 212} 213 214func (ix *Indexer) Delete(ctx context.Context, pullID int64) error { 215 return ix.indexer.Delete(base36.Encode(pullID)) 216} 217 218// Search searches for pulls 219func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) { 220 var queries []query.Query 221 222 // TODO(boltless): remove this after implementing pulls page pagination 223 limit := opts.Page.Limit 224 if limit == 0 { 225 limit = 500 226 } 227 228 if opts.Keyword != "" { 229 queries = append(queries, bleve.NewDisjunctionQuery( 230 bleveutil.MatchAndQuery("title", opts.Keyword, pullIndexerAnalyzer, 0), 231 bleveutil.MatchAndQuery("body", opts.Keyword, pullIndexerAnalyzer, 0), 232 )) 233 } 234 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 235 queries = append(queries, bleveutil.KeywordFieldQuery("state", opts.State.String())) 236 237 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) 238 searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false) 239 res, err := ix.indexer.SearchInContext(ctx, searchReq) 240 if err != nil { 241 return nil, nil 242 } 243 ret := &searchResult{ 244 Total: res.Total, 245 Hits: make([]int64, len(res.Hits)), 246 } 247 for i, hit := range res.Hits { 248 id, err := base36.Decode(hit.ID) 249 if err != nil { 250 return nil, err 251 } 252 ret.Hits[i] = id 253 } 254 return ret, nil 255}