···
+
// heavily inspired by gitea's model (basically copy-pasted)
+
"github.com/blevesearch/bleve/v2"
+
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
+
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
+
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
+
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+
"github.com/blevesearch/bleve/v2/index/upsidedown"
+
"github.com/blevesearch/bleve/v2/mapping"
+
"github.com/blevesearch/bleve/v2/search/query"
+
"tangled.org/core/appview/db"
+
"tangled.org/core/appview/indexer/base36"
+
"tangled.org/core/appview/indexer/bleve"
+
"tangled.org/core/appview/models"
+
tlog "tangled.org/core/log"
+
pullIndexerAnalyzer = "pullIndexer"
+
pullIndexerDocType = "pullIndexerDocType"
+
unicodeNormalizeName = "uicodeNormalize"
+
func NewIndexer(indexDir string) *Indexer {
+
// Init initializes the indexer
+
func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
+
l := tlog.FromContext(ctx)
+
existed, err := ix.intialize(ctx)
+
log.Fatalln("failed to initialize pull indexer", err)
+
l.Debug("Populating the pull indexer")
+
err := PopulateIndexer(ctx, ix, e)
+
log.Fatalln("failed to populate pull indexer", err)
+
l.Info("Initialized the pull indexer")
+
func generatePullIndexMapping() (mapping.IndexMapping, error) {
+
mapping := bleve.NewIndexMapping()
+
docMapping := bleve.NewDocumentMapping()
+
textFieldMapping := bleve.NewTextFieldMapping()
+
textFieldMapping.Store = false
+
textFieldMapping.IncludeInAll = false
+
keywordFieldMapping := bleve.NewKeywordFieldMapping()
+
keywordFieldMapping.Store = false
+
keywordFieldMapping.IncludeInAll = false
+
// numericFieldMapping := bleve.NewNumericFieldMapping()
+
docMapping.AddFieldMappingsAt("title", textFieldMapping)
+
docMapping.AddFieldMappingsAt("body", textFieldMapping)
+
docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
+
docMapping.AddFieldMappingsAt("state", keywordFieldMapping)
+
err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
+
"type": unicodenorm.Name,
+
"form": unicodenorm.NFC,
+
err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{
+
"char_filters": []string{},
+
"tokenizer": unicode.Name,
+
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
+
mapping.DefaultAnalyzer = pullIndexerAnalyzer
+
mapping.AddDocumentMapping(pullIndexerDocType, docMapping)
+
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
+
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
+
func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
+
return false, errors.New("indexer is already initialized")
+
indexer, err := openIndexer(ctx, ix.path)
+
mapping, err := generatePullIndexMapping()
+
indexer, err = bleve.New(ix.path, mapping)
+
func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
+
l := tlog.FromContext(ctx)
+
indexer, err := bleve.Open(path)
+
if errors.Is(err, upsidedown.IncompatibleVersion) {
+
l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
+
return nil, os.RemoveAll(path)
+
func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
+
l := tlog.FromContext(ctx)
+
pulls, err := db.GetPulls(e)
+
err = ix.Index(ctx, pulls...)
+
l.Info("pulls indexed", "count", count)
+
// pullData data stored and will be indexed
+
RepoAt string `json:"repo_at"`
+
PullID int `json:"pull_id"`
+
Title string `json:"title"`
+
Body string `json:"body"`
+
State string `json:"state"`
+
Comments []pullCommentData `json:"comments"`
+
func makePullData(pull *models.Pull) *pullData {
+
RepoAt: pull.RepoAt.String(),
+
State: pull.State.String(),
+
// Type returns the document type, for bleve's mapping.Classifier interface.
+
func (i *pullData) Type() string {
+
return pullIndexerDocType
+
type pullCommentData struct {
+
Body string `json:"body"`
+
type searchResult struct {
+
const maxBatchSize = 20
+
func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error {
+
batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
+
for _, pull := range pulls {
+
pullData := makePullData(pull)
+
if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil {
+
func (ix *Indexer) Delete(ctx context.Context, pullID int64) error {
+
return ix.indexer.Delete(base36.Encode(pullID))
+
// Search searches for pulls
+
func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) {
+
var queries []query.Query
+
// TODO(boltless): remove this after implementing pulls page pagination
+
limit := opts.Page.Limit
+
if opts.Keyword != "" {
+
queries = append(queries, bleve.NewDisjunctionQuery(
+
bleveutil.MatchAndQuery("title", opts.Keyword, pullIndexerAnalyzer, 0),
+
bleveutil.MatchAndQuery("body", opts.Keyword, pullIndexerAnalyzer, 0),
+
queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
+
queries = append(queries, bleveutil.KeywordFieldQuery("state", opts.State.String()))
+
var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
+
searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false)
+
res, err := ix.indexer.SearchInContext(ctx, searchReq)
+
Hits: make([]int64, len(res.Hits)),
+
for i, hit := range res.Hits {
+
id, err := base36.Decode(hit.ID)