forked from tangled.org/core
Monorepo for Tangled — https://tangled.org
1// heavily inspired by gitea's model (basically copy-pasted) 2package issues_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 "tangled.org/core/appview/pagination" 24 tlog "tangled.org/core/log" 25) 26 27const ( 28 issueIndexerAnalyzer = "issueIndexer" 29 issueIndexerDocType = "issueIndexerDocType" 30 31 unicodeNormalizeName = "uicodeNormalize" 32) 33 34type Indexer struct { 35 indexer bleve.Index 36 path string 37} 38 39func NewIndexer(indexDir string) *Indexer { 40 return &Indexer{ 41 path: indexDir, 42 } 43} 44 45// Init initializes the indexer 46func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 47 l := tlog.FromContext(ctx) 48 existed, err := ix.intialize(ctx) 49 if err != nil { 50 log.Fatalln("failed to initialize issue indexer", err) 51 } 52 if !existed { 53 l.Debug("Populating the issue indexer") 54 err := PopulateIndexer(ctx, ix, e) 55 if err != nil { 56 log.Fatalln("failed to populate issue indexer", err) 57 } 58 } 59 60 count, _ := ix.indexer.DocCount() 61 l.Info("Initialized the issue indexer", "docCount", count) 62} 63 64func generateIssueIndexMapping() (mapping.IndexMapping, error) { 65 mapping := bleve.NewIndexMapping() 66 docMapping := bleve.NewDocumentMapping() 67 68 textFieldMapping := bleve.NewTextFieldMapping() 69 textFieldMapping.Store = false 70 textFieldMapping.IncludeInAll = false 71 72 boolFieldMapping := bleve.NewBooleanFieldMapping() 73 boolFieldMapping.Store = false 74 boolFieldMapping.IncludeInAll = false 75 76 keywordFieldMapping := bleve.NewKeywordFieldMapping() 77 keywordFieldMapping.Store = false 78 keywordFieldMapping.IncludeInAll = false 79 80 // numericFieldMapping := bleve.NewNumericFieldMapping() 81 82 docMapping.AddFieldMappingsAt("title", textFieldMapping) 83 docMapping.AddFieldMappingsAt("body", textFieldMapping) 84 85 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 86 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 87 88 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 89 "type": unicodenorm.Name, 90 "form": unicodenorm.NFC, 91 }) 92 if err != nil { 93 return nil, err 94 } 95 96 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 97 "type": custom.Name, 98 "char_filters": []string{}, 99 "tokenizer": unicode.Name, 100 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 101 }) 102 if err != nil { 103 return nil, err 104 } 105 106 mapping.DefaultAnalyzer = issueIndexerAnalyzer 107 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 108 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 109 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 110 111 return mapping, nil 112} 113 114func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 115 if ix.indexer != nil { 116 return false, errors.New("indexer is already initialized") 117 } 118 119 indexer, err := openIndexer(ctx, ix.path) 120 if err != nil { 121 return false, err 122 } 123 if indexer != nil { 124 ix.indexer = indexer 125 return true, nil 126 } 127 128 mapping, err := generateIssueIndexMapping() 129 if err != nil { 130 return false, err 131 } 132 indexer, err = bleve.New(ix.path, mapping) 133 if err != nil { 134 return false, err 135 } 136 137 ix.indexer = indexer 138 139 return false, nil 140} 141 142func openIndexer(ctx context.Context, path string) (bleve.Index, error) { 143 l := tlog.FromContext(ctx) 144 indexer, err := bleve.Open(path) 145 if err != nil { 146 if errors.Is(err, upsidedown.IncompatibleVersion) { 147 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 148 return nil, os.RemoveAll(path) 149 } 150 return nil, nil 151 } 152 return indexer, nil 153} 154 155func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 156 l := tlog.FromContext(ctx) 157 count := 0 158 err := pagination.IterateAll( 159 func(page pagination.Page) ([]models.Issue, error) { 160 return db.GetIssuesPaginated(e, page) 161 }, 162 func(issues []models.Issue) error { 163 count += len(issues) 164 return ix.Index(ctx, issues...) 165 }, 166 ) 167 l.Info("issues indexed", "count", count) 168 return err 169} 170 171// issueData data stored and will be indexed 172type issueData struct { 173 ID int64 `json:"id"` 174 RepoAt string `json:"repo_at"` 175 IssueID int `json:"issue_id"` 176 Title string `json:"title"` 177 Body string `json:"body"` 178 179 IsOpen bool `json:"is_open"` 180 Comments []IssueCommentData `json:"comments"` 181} 182 183func makeIssueData(issue *models.Issue) *issueData { 184 return &issueData{ 185 ID: issue.Id, 186 RepoAt: issue.RepoAt.String(), 187 IssueID: issue.IssueId, 188 Title: issue.Title, 189 Body: issue.Body, 190 IsOpen: issue.Open, 191 } 192} 193 194// Type returns the document type, for bleve's mapping.Classifier interface. 195func (i *issueData) Type() string { 196 return issueIndexerDocType 197} 198 199type IssueCommentData struct { 200 Body string `json:"body"` 201} 202 203type SearchResult struct { 204 Hits []int64 205 Total uint64 206} 207 208const maxBatchSize = 20 209 210func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error { 211 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 212 for _, issue := range issues { 213 issueData := makeIssueData(&issue) 214 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil { 215 return err 216 } 217 } 218 return batch.Flush() 219} 220 221func (ix *Indexer) Delete(ctx context.Context, issueId int64) error { 222 return ix.indexer.Delete(base36.Encode(issueId)) 223} 224 225// Search searches for issues 226func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) { 227 var queries []query.Query 228 229 if opts.Keyword != "" { 230 queries = append(queries, bleve.NewDisjunctionQuery( 231 bleveutil.MatchAndQuery("title", opts.Keyword, issueIndexerAnalyzer, 0), 232 bleveutil.MatchAndQuery("body", opts.Keyword, issueIndexerAnalyzer, 0), 233 )) 234 } 235 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 236 queries = append(queries, bleveutil.BoolFieldQuery("is_open", opts.IsOpen)) 237 // TODO: append more queries 238 239 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) 240 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 241 res, err := ix.indexer.SearchInContext(ctx, searchReq) 242 if err != nil { 243 return nil, nil 244 } 245 ret := &SearchResult{ 246 Total: res.Total, 247 Hits: make([]int64, len(res.Hits)), 248 } 249 for i, hit := range res.Hits { 250 id, err := base36.Decode(hit.ID) 251 if err != nil { 252 return nil, err 253 } 254 ret.Hits[i] = id 255 } 256 return ret, nil 257}