appview: indexer: add indexer mappings #673

closed
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+88 -4
appview
indexer
issues
+88 -4
appview/indexer/issues/indexer.go
···
+
// heavily inspired by gitea's model (basically copy-pasted)
package issues_indexer
import (
···
"os"
"github.com/blevesearch/bleve/v2"
+
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
+
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
+
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
+
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/index/upsidedown"
+
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search/query"
"tangled.org/core/appview/db"
"tangled.org/core/appview/indexer/base36"
···
tlog "tangled.org/core/log"
)
+
const (
+
issueIndexerAnalyzer = "issueIndexer"
+
issueIndexerDocType = "issueIndexerDocType"
+
+
unicodeNormalizeName = "uicodeNormalize"
+
)
+
type Indexer struct {
indexer bleve.Index
path string
···
l.Info("Initialized the issue indexer")
}
+
func generateIssueIndexMapping() (mapping.IndexMapping, error) {
+
mapping := bleve.NewIndexMapping()
+
docMapping := bleve.NewDocumentMapping()
+
+
textFieldMapping := bleve.NewTextFieldMapping()
+
textFieldMapping.Store = false
+
textFieldMapping.IncludeInAll = false
+
+
boolFieldMapping := bleve.NewBooleanFieldMapping()
+
boolFieldMapping.Store = false
+
boolFieldMapping.IncludeInAll = false
+
+
keywordFieldMapping := bleve.NewKeywordFieldMapping()
+
keywordFieldMapping.Store = false
+
keywordFieldMapping.IncludeInAll = false
+
+
// numericFieldMapping := bleve.NewNumericFieldMapping()
+
+
docMapping.AddFieldMappingsAt("title", textFieldMapping)
+
docMapping.AddFieldMappingsAt("body", textFieldMapping)
+
+
docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
+
docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
+
+
err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
+
"type": unicodenorm.Name,
+
"form": unicodenorm.NFC,
+
})
+
if err != nil {
+
return nil, err
+
}
+
+
err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
+
"type": custom.Name,
+
"char_filters": []string{},
+
"tokenizer": unicode.Name,
+
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
+
})
+
if err != nil {
+
return nil, err
+
}
+
+
mapping.DefaultAnalyzer = issueIndexerAnalyzer
+
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
+
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
+
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
+
+
return mapping, nil
+
}
+
func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
if ix.indexer != nil {
return false, errors.New("indexer is already initialized")
···
return true, nil
}
-
mapping := bleve.NewIndexMapping()
+
mapping, err := generateIssueIndexMapping()
+
if err != nil {
+
return false, err
+
}
indexer, err = bleve.New(ix.path, mapping)
if err != nil {
return false, err
···
for _, issue := range issues {
dataList = append(dataList, &IssueData{
ID: issue.Id,
+
RepoAt: issue.RepoAt.String(),
IssueID: issue.IssueId,
Title: issue.Title,
Body: issue.Body,
···
// IssueData data stored and will be indexed
type IssueData struct {
ID int64 `json:"id"`
+
RepoAt string `json:"repo_at"`
IssueID int `json:"issue_id"`
Title string `json:"title"`
Body string `json:"body"`
···
Comments []IssueCommentData `json:"comments"`
}
+
// Type returns the document type, for bleve's mapping.Classifier interface.
+
func (i *IssueData) Type() string {
+
return issueIndexerDocType
+
}
+
+
type IssueCommentData struct {
Body string `json:"body"`
}
···
if opts.Keyword != "" {
queries = append(queries, bleve.NewDisjunctionQuery(
-
matchAndQuery(opts.Keyword, "title"),
-
matchAndQuery(opts.Keyword, "body"),
+
matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0),
+
matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0),
))
}
+
queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at"))
queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open"))
// TODO: append more queries
···
return ret, nil
}
-
func matchAndQuery(keyword, field string) query.Query {
+
func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query {
q := bleve.NewMatchQuery(keyword)
q.FieldVal = field
+
q.Analyzer = analyzer
+
q.Fuzziness = fuzziness
return q
}
···
q.FieldVal = field
return q
}
+
+
func keywordFieldQuery(keyword, field string) query.Query {
+
q := bleve.NewTermQuery(keyword)
+
q.FieldVal = field
+
return q
+
}