From a1ff26a5e0c75e1ce4083d9a6d4ff292d1c7f939 Mon Sep 17 00:00:00 2001 From: Seongmin Lee Date: Thu, 14 Aug 2025 01:14:57 +0900 Subject: [PATCH] appview: indexer: add indexer mappings Change-Id: tmkvwxporzqzvnpkluklsnvpztkoropq Signed-off-by: Seongmin Lee --- appview/indexer/issues/indexer.go | 82 +++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 4 deletions(-) diff --git a/appview/indexer/issues/indexer.go b/appview/indexer/issues/indexer.go index ed17cb83..449af030 100644 --- a/appview/indexer/issues/indexer.go +++ b/appview/indexer/issues/indexer.go @@ -7,7 +7,13 @@ import ( "os" "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/index/upsidedown" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" "tangled.org/core/appview/db" "tangled.org/core/appview/indexer/base36" @@ -17,6 +23,13 @@ import ( tlog "tangled.org/core/log" ) +const ( + issueIndexerAnalyzer = "issueIndexer" + issueIndexerDocType = "issueIndexerDocType" + + unicodeNormalizeName = "uicodeNormalize" +) + type Indexer struct { indexer bleve.Index path string @@ -45,6 +58,56 @@ func (ix *Indexer) Init(ctx context.Context, e db.Execer) { l.Info("Initialized the issue indexer") } +func generateIssueIndexMapping() (mapping.IndexMapping, error) { + mapping := bleve.NewIndexMapping() + docMapping := bleve.NewDocumentMapping() + + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + textFieldMapping.IncludeInAll = false + + boolFieldMapping := bleve.NewBooleanFieldMapping() + boolFieldMapping.Store = false + boolFieldMapping.IncludeInAll = false + + keywordFieldMapping := bleve.NewKeywordFieldMapping() + keywordFieldMapping.Store = false + keywordFieldMapping.IncludeInAll = false + + // numericFieldMapping := bleve.NewNumericFieldMapping() + + docMapping.AddFieldMappingsAt("title", textFieldMapping) + docMapping.AddFieldMappingsAt("body", textFieldMapping) + + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) + + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ + "type": unicodenorm.Name, + "form": unicodenorm.NFC, + }) + if err != nil { + return nil, err + } + + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ + "type": custom.Name, + "char_filters": []string{}, + "tokenizer": unicode.Name, + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, + }) + if err != nil { + return nil, err + } + + mapping.DefaultAnalyzer = issueIndexerAnalyzer + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() + + return mapping, nil +} + func (ix *Indexer) intialize(ctx context.Context) (bool, error) { if ix.indexer != nil { return false, errors.New("indexer is already initialized") @@ -59,7 +122,10 @@ func (ix *Indexer) intialize(ctx context.Context) (bool, error) { return true, nil } - mapping := bleve.NewIndexMapping() + mapping, err := generateIssueIndexMapping() + if err != nil { + return false, err + } indexer, err = bleve.New(ix.path, mapping) if err != nil { return false, err @@ -122,6 +188,12 @@ func makeIssueData(issue *models.Issue) issueData { } } +// Type returns the document type, for bleve's mapping.Classifier interface. +func (i *issueData) Type() string { + return issueIndexerDocType +} + + type IssueCommentData struct { Body string `json:"body"` } @@ -150,8 +222,8 @@ func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) ( if opts.Keyword != "" { queries = append(queries, bleve.NewDisjunctionQuery( - matchAndQuery(opts.Keyword, "title"), - matchAndQuery(opts.Keyword, "body"), + matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0), + matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0), )) } queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) @@ -178,9 +250,11 @@ func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) ( return ret, nil } -func matchAndQuery(keyword, field string) query.Query { +func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query { q := bleve.NewMatchQuery(keyword) q.FieldVal = field + q.Analyzer = analyzer + q.Fuzziness = fuzziness return q } -- 2.43.0