first-commit

This commit is contained in:
2025-08-25 15:46:12 +08:00
commit f4d95dfff4
5665 changed files with 705359 additions and 0 deletions

View File

@@ -0,0 +1,322 @@
// Copyright 2018 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package bleve
import (
"context"
"strconv"
"code.gitea.io/gitea/modules/indexer"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search/query"
)
const (
issueIndexerAnalyzer = "issueIndexer"
issueIndexerDocType = "issueIndexerDocType"
issueIndexerLatestVersion = 5
)
const unicodeNormalizeName = "unicodeNormalize"
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
"type": unicodenorm.Name,
"form": unicodenorm.NFC,
})
}
const maxBatchSize = 16
// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
// Type returns the document type, for bleve's mapping.Classifier interface.
func (i *IndexerData) Type() string {
return issueIndexerDocType
}
// generateIssueIndexMapping generates the bleve index mapping for issues
func generateIssueIndexMapping() (mapping.IndexMapping, error) {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
numericFieldMapping := bleve.NewNumericFieldMapping()
numericFieldMapping.Store = false
numericFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Store = false
textFieldMapping.IncludeInAll = false
boolFieldMapping := bleve.NewBooleanFieldMapping()
boolFieldMapping.Store = false
boolFieldMapping.IncludeInAll = false
numberFieldMapping := bleve.NewNumericFieldMapping()
numberFieldMapping.Store = false
numberFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
docMapping.AddFieldMappingsAt("title", textFieldMapping)
docMapping.AddFieldMappingsAt("content", textFieldMapping)
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping)
docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping)
docMapping.AddFieldMappingsAt("is_archived", boolFieldMapping)
docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping)
docMapping.AddFieldMappingsAt("no_label", boolFieldMapping)
docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping)
docMapping.AddFieldMappingsAt("project_id", numberFieldMapping)
docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping)
docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping)
docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping)
docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping)
docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping)
docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping)
docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping)
docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping)
docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping)
docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping)
docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping)
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return nil, err
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return nil, err
}
mapping.DefaultAnalyzer = issueIndexerAnalyzer
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
return mapping, nil
}
var _ internal.Indexer = &Indexer{}
// Indexer implements Indexer interface
type Indexer struct {
inner *inner_bleve.Indexer
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
}
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWordsFuzzy()
}
// NewIndexer creates a new bleve local indexer
func NewIndexer(indexDir string) *Indexer {
inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping)
return &Indexer{
Indexer: inner,
inner: inner,
}
}
// Index will save the index data
func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
for _, issue := range issues {
if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
return err
}
}
return batch.Flush()
}
// Delete deletes indexes by ids
func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
for _, id := range ids {
if err := batch.Delete(indexer_internal.Base36(id)); err != nil {
return err
}
}
return batch.Flush()
}
// Search searches for issues by given conditions.
// Returns the matching issue IDs
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
var queries []query.Query
if options.Keyword != "" {
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy {
fuzziness := 0
if searchMode == indexer.SearchModeFuzzy {
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
} else /* exact */ {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0),
}...))
}
}
if len(options.RepoIDs) > 0 || options.AllPublic {
var repoQueries []query.Query
for _, repoID := range options.RepoIDs {
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id"))
}
if options.AllPublic {
repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public"))
}
queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...))
}
if options.IsPull.Has() {
queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull"))
}
if options.IsClosed.Has() {
queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed"))
}
if options.IsArchived.Has() {
queries = append(queries, inner_bleve.BoolFieldQuery(options.IsArchived.Value(), "is_archived"))
}
if options.NoLabelOnly {
queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label"))
} else {
if len(options.IncludedLabelIDs) > 0 {
var includeQueries []query.Query
for _, labelID := range options.IncludedLabelIDs {
includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
}
queries = append(queries, bleve.NewConjunctionQuery(includeQueries...))
} else if len(options.IncludedAnyLabelIDs) > 0 {
var includeQueries []query.Query
for _, labelID := range options.IncludedAnyLabelIDs {
includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
}
queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...))
}
if len(options.ExcludedLabelIDs) > 0 {
var excludeQueries []query.Query
for _, labelID := range options.ExcludedLabelIDs {
q := bleve.NewBooleanQuery()
q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
excludeQueries = append(excludeQueries, q)
}
queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...))
}
}
if len(options.MilestoneIDs) > 0 {
var milestoneQueries []query.Query
for _, milestoneID := range options.MilestoneIDs {
milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id"))
}
queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...))
}
if options.ProjectID.Has() {
queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectID.Value(), "project_id"))
}
if options.ProjectColumnID.Has() {
queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectColumnID.Value(), "project_board_id"))
}
if options.PosterID != "" {
// "(none)" becomes 0, it means no poster
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
queries = append(queries, inner_bleve.NumericEqualityQuery(posterIDInt64, "poster_id"))
}
if options.AssigneeID != "" {
if options.AssigneeID == "(any)" {
queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(optional.Some[int64](1), optional.None[int64](), "assignee_id"))
} else {
// "(none)" becomes 0, it means no assignee
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
queries = append(queries, inner_bleve.NumericEqualityQuery(assigneeIDInt64, "assignee_id"))
}
}
if options.MentionID.Has() {
queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids"))
}
if options.ReviewedID.Has() {
queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids"))
}
if options.ReviewRequestedID.Has() {
queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids"))
}
if options.SubscriberID.Has() {
queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids"))
}
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(
options.UpdatedAfterUnix,
options.UpdatedBeforeUnix,
"updated_unix"))
}
var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
if len(queries) == 0 {
indexerQuery = bleve.NewMatchAllQuery()
}
skip, limit := indexer_internal.ParsePaginator(options.Paginator)
search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false)
if options.SortBy == "" {
options.SortBy = internal.SortByCreatedAsc
}
search.SortBy([]string{string(options.SortBy), "-_id"})
result, err := b.inner.Indexer.SearchInContext(ctx, search)
if err != nil {
return nil, err
}
ret := &internal.SearchResult{
Total: int64(result.Total),
Hits: make([]internal.Match, 0, len(result.Hits)),
}
for _, hit := range result.Hits {
id, err := indexer_internal.ParseBase36(hit.ID)
if err != nil {
return nil, err
}
ret.Hits = append(ret.Hits, internal.Match{
ID: id,
})
}
return ret, nil
}

View File

@@ -0,0 +1,18 @@
// Copyright 2018 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package bleve
import (
"testing"
"code.gitea.io/gitea/modules/indexer/issues/internal/tests"
)
func TestBleveIndexer(t *testing.T) {
dir := t.TempDir()
indexer := NewIndexer(dir)
defer indexer.Close()
tests.TestIndexer(t, indexer)
}

View File

@@ -0,0 +1,143 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package db
import (
"context"
"strings"
"sync"
"code.gitea.io/gitea/models/db"
issue_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/indexer"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_db "code.gitea.io/gitea/modules/indexer/internal/db"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/util"
"xorm.io/builder"
)
var _ internal.Indexer = (*Indexer)(nil)
// Indexer implements Indexer interface to use database's like search
type Indexer struct {
indexer_internal.Indexer
}
func (i *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWords()
}
var GetIndexer = sync.OnceValue(func() *Indexer {
return &Indexer{Indexer: &inner_db.Indexer{}}
})
// Index dummy function
func (i *Indexer) Index(_ context.Context, _ ...*internal.IndexerData) error {
return nil
}
// Delete dummy function
func (i *Indexer) Delete(_ context.Context, _ ...int64) error {
return nil
}
func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond {
if mode == indexer.SearchModeExact {
return db.BuildCaseInsensitiveLike(colName, keyword)
}
// match words
cond := builder.NewCond()
fields := strings.Fields(keyword)
if len(fields) == 0 {
return builder.Expr("1=1")
}
for _, field := range fields {
if field == "" {
continue
}
cond = cond.And(db.BuildCaseInsensitiveLike(colName, field))
}
return cond
}
// Search searches for issues
func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
// FIXME: I tried to avoid importing models here, but it seems to be impossible.
// We can provide a function to register the search function, so models/issues can register it.
// So models/issues will import modules/indexer/issues, it's OK because it's by design.
// But modules/indexer/issues has already imported models/issues to do UpdateRepoIndexer and UpdateIssueIndexer.
// And to avoid circular import, we have to move the functions to another package.
// I believe it should be services/indexer, sounds great!
// But the two functions are used in modules/notification/indexer, that means we will import services/indexer in modules/notification/indexer.
// So that's the root problem:
// The notification is defined in modules, but it's using lots of things should be in services.
cond := builder.NewCond()
if options.Keyword != "" {
repoCond := builder.In("repo_id", options.RepoIDs)
if len(options.RepoIDs) == 1 {
repoCond = builder.Eq{"repo_id": options.RepoIDs[0]}
}
subQuery := builder.Select("id").From("issue").Where(repoCond)
searchMode := util.IfZero(options.SearchMode, i.SupportedSearchModes()[0].ModeValue)
cond = builder.Or(
buildMatchQuery(searchMode, "issue.name", options.Keyword),
buildMatchQuery(searchMode, "issue.content", options.Keyword),
builder.In("issue.id", builder.Select("issue_id").
From("comment").
Where(builder.And(
builder.Eq{"type": issue_model.CommentTypeComment},
builder.In("issue_id", subQuery),
buildMatchQuery(searchMode, "content", options.Keyword),
)),
),
)
if options.IsKeywordNumeric() {
cond = cond.Or(
builder.Eq{"`index`": options.Keyword},
)
}
}
opt, err := ToDBOptions(ctx, options)
if err != nil {
return nil, err
}
// If pagesize == 0, return total count only. It's a special case for search count.
if options.Paginator != nil && options.Paginator.PageSize == 0 {
total, err := issue_model.CountIssues(ctx, opt, cond)
if err != nil {
return nil, err
}
return &internal.SearchResult{
Total: total,
}, nil
}
return i.FindWithIssueOptions(ctx, opt, cond)
}
func (i *Indexer) FindWithIssueOptions(ctx context.Context, opt *issue_model.IssuesOptions, otherConds ...builder.Cond) (*internal.SearchResult, error) {
ids, total, err := issue_model.IssueIDs(ctx, opt, otherConds...)
if err != nil {
return nil, err
}
hits := make([]internal.Match, 0, len(ids))
for _, id := range ids {
hits = append(hits, internal.Match{
ID: id,
})
}
return &internal.SearchResult{
Total: total,
Hits: hits,
}, nil
}

View File

@@ -0,0 +1,116 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package db
import (
"context"
"fmt"
"strings"
"code.gitea.io/gitea/models/db"
issue_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/optional"
)
func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_model.IssuesOptions, error) {
var sortType string
switch options.SortBy {
case internal.SortByCreatedAsc:
sortType = "oldest"
case internal.SortByUpdatedAsc:
sortType = "leastupdate"
case internal.SortByCommentsAsc:
sortType = "leastcomment"
case internal.SortByDeadlineDesc:
sortType = "farduedate"
case internal.SortByCreatedDesc:
sortType = "newest"
case internal.SortByUpdatedDesc:
sortType = "recentupdate"
case internal.SortByCommentsDesc:
sortType = "mostcomment"
case internal.SortByDeadlineAsc:
sortType = "nearduedate"
default:
if strings.HasPrefix(string(options.SortBy), issue_model.ScopeSortPrefix) {
sortType = string(options.SortBy)
} else {
sortType = "newest"
}
}
// See the comment of issues_model.SearchOptions for the reason why we need to convert
convertID := func(id optional.Option[int64]) int64 {
if !id.Has() {
return 0
}
value := id.Value()
if value == 0 {
return db.NoConditionID
}
return value
}
opts := &issue_model.IssuesOptions{
Paginator: options.Paginator,
RepoIDs: options.RepoIDs,
AllPublic: options.AllPublic,
RepoCond: nil,
AssigneeID: options.AssigneeID,
PosterID: options.PosterID,
MentionedID: convertID(options.MentionID),
ReviewRequestedID: convertID(options.ReviewRequestedID),
ReviewedID: convertID(options.ReviewedID),
SubscriberID: convertID(options.SubscriberID),
ProjectID: convertID(options.ProjectID),
ProjectColumnID: convertID(options.ProjectColumnID),
IsClosed: options.IsClosed,
IsPull: options.IsPull,
IncludedLabelNames: nil,
ExcludedLabelNames: nil,
IncludeMilestones: nil,
SortType: sortType,
UpdatedAfterUnix: options.UpdatedAfterUnix.Value(),
UpdatedBeforeUnix: options.UpdatedBeforeUnix.Value(),
PriorityRepoID: 0,
IsArchived: options.IsArchived,
Owner: nil,
Team: nil,
Doer: nil,
}
if len(options.MilestoneIDs) == 1 && options.MilestoneIDs[0] == 0 {
opts.MilestoneIDs = []int64{db.NoConditionID}
} else {
opts.MilestoneIDs = options.MilestoneIDs
}
if options.NoLabelOnly {
opts.LabelIDs = []int64{0} // Be careful, it's zero, not db.NoConditionID
} else {
opts.LabelIDs = make([]int64, 0, len(options.IncludedLabelIDs)+len(options.ExcludedLabelIDs))
opts.LabelIDs = append(opts.LabelIDs, options.IncludedLabelIDs...)
for _, id := range options.ExcludedLabelIDs {
opts.LabelIDs = append(opts.LabelIDs, -id)
}
if len(options.IncludedLabelIDs) == 0 && len(options.IncludedAnyLabelIDs) > 0 {
labels, err := issue_model.GetLabelsByIDs(ctx, options.IncludedAnyLabelIDs, "name")
if err != nil {
return nil, fmt.Errorf("GetLabelsByIDs: %v", err)
}
set := container.Set[string]{}
for _, label := range labels {
if !set.Contains(label.Name) {
set.Add(label.Name)
opts.IncludedLabelNames = append(opts.IncludedLabelNames, label.Name)
}
}
}
}
return opts, nil
}

View File

@@ -0,0 +1,113 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package issues
import (
"strings"
"code.gitea.io/gitea/models/db"
issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/setting"
)
func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOptions {
if opts.IssueIDs != nil {
setting.PanicInDevOrTesting("Indexer SearchOptions doesn't support IssueIDs")
}
searchOpt := &SearchOptions{
Keyword: keyword,
RepoIDs: opts.RepoIDs,
AllPublic: opts.AllPublic,
IsPull: opts.IsPull,
IsClosed: opts.IsClosed,
IsArchived: opts.IsArchived,
}
if len(opts.LabelIDs) == 1 && opts.LabelIDs[0] == 0 {
searchOpt.NoLabelOnly = true
} else {
for _, labelID := range opts.LabelIDs {
if labelID > 0 {
searchOpt.IncludedLabelIDs = append(searchOpt.IncludedLabelIDs, labelID)
} else {
searchOpt.ExcludedLabelIDs = append(searchOpt.ExcludedLabelIDs, -labelID)
}
}
// opts.IncludedLabelNames and opts.ExcludedLabelNames are not supported here.
// It's not a TO DO, it's just unnecessary.
}
if len(opts.MilestoneIDs) == 1 && opts.MilestoneIDs[0] == db.NoConditionID {
searchOpt.MilestoneIDs = []int64{0}
} else {
searchOpt.MilestoneIDs = opts.MilestoneIDs
}
if opts.ProjectID > 0 {
searchOpt.ProjectID = optional.Some(opts.ProjectID)
} else if opts.ProjectID == db.NoConditionID { // FIXME: this is inconsistent from other places
searchOpt.ProjectID = optional.Some[int64](0) // Those issues with no project(projectid==0)
}
searchOpt.AssigneeID = opts.AssigneeID
// See the comment of issues_model.SearchOptions for the reason why we need to convert
convertID := func(id int64) optional.Option[int64] {
if id > 0 {
return optional.Some(id)
}
if id == db.NoConditionID {
return optional.None[int64]()
}
return nil
}
searchOpt.ProjectColumnID = convertID(opts.ProjectColumnID)
searchOpt.PosterID = opts.PosterID
searchOpt.MentionID = convertID(opts.MentionedID)
searchOpt.ReviewedID = convertID(opts.ReviewedID)
searchOpt.ReviewRequestedID = convertID(opts.ReviewRequestedID)
searchOpt.SubscriberID = convertID(opts.SubscriberID)
if opts.UpdatedAfterUnix > 0 {
searchOpt.UpdatedAfterUnix = optional.Some(opts.UpdatedAfterUnix)
}
if opts.UpdatedBeforeUnix > 0 {
searchOpt.UpdatedBeforeUnix = optional.Some(opts.UpdatedBeforeUnix)
}
searchOpt.Paginator = opts.Paginator
switch opts.SortType {
case "", "latest":
searchOpt.SortBy = SortByCreatedDesc
case "oldest":
searchOpt.SortBy = SortByCreatedAsc
case "recentupdate":
searchOpt.SortBy = SortByUpdatedDesc
case "leastupdate":
searchOpt.SortBy = SortByUpdatedAsc
case "mostcomment":
searchOpt.SortBy = SortByCommentsDesc
case "leastcomment":
searchOpt.SortBy = SortByCommentsAsc
case "nearduedate":
searchOpt.SortBy = SortByDeadlineAsc
case "farduedate":
searchOpt.SortBy = SortByDeadlineDesc
case "priority", "priorityrepo", "project-column-sorting":
// Unsupported sort type for search
fallthrough
default:
if strings.HasPrefix(opts.SortType, issues_model.ScopeSortPrefix) {
searchOpt.SortBy = internal.SortBy(opts.SortType)
} else {
searchOpt.SortBy = SortByUpdatedDesc
}
}
return searchOpt
}

View File

@@ -0,0 +1,310 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package elasticsearch
import (
"context"
"strconv"
"strings"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/indexer"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/util"
"github.com/olivere/elastic/v7"
)
const (
issueIndexerLatestVersion = 2
// multi-match-types, currently only 2 types are used
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
esMultiMatchTypeBestFields = "best_fields"
esMultiMatchTypePhrasePrefix = "phrase_prefix"
)
var _ internal.Indexer = &Indexer{}
// Indexer implements Indexer interface
type Indexer struct {
inner *inner_elasticsearch.Indexer
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
}
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
return indexer.SearchModesExactWords()
}
// NewIndexer creates a new elasticsearch indexer
func NewIndexer(url, indexerName string) *Indexer {
inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
indexer := &Indexer{
inner: inner,
Indexer: inner,
}
return indexer
}
const (
defaultMapping = `
{
"mappings": {
"properties": {
"id": { "type": "integer", "index": true },
"repo_id": { "type": "integer", "index": true },
"is_public": { "type": "boolean", "index": true },
"title": { "type": "text", "index": true },
"content": { "type": "text", "index": true },
"comments": { "type" : "text", "index": true },
"is_pull": { "type": "boolean", "index": true },
"is_closed": { "type": "boolean", "index": true },
"is_archived": { "type": "boolean", "index": true },
"label_ids": { "type": "integer", "index": true },
"no_label": { "type": "boolean", "index": true },
"milestone_id": { "type": "integer", "index": true },
"project_id": { "type": "integer", "index": true },
"project_board_id": { "type": "integer", "index": true },
"poster_id": { "type": "integer", "index": true },
"assignee_id": { "type": "integer", "index": true },
"mention_ids": { "type": "integer", "index": true },
"reviewed_ids": { "type": "integer", "index": true },
"review_requested_ids": { "type": "integer", "index": true },
"subscriber_ids": { "type": "integer", "index": true },
"updated_unix": { "type": "integer", "index": true },
"created_unix": { "type": "integer", "index": true },
"deadline_unix": { "type": "integer", "index": true },
"comment_count": { "type": "integer", "index": true }
}
}
}
`
)
// Index will save the index data
func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
if len(issues) == 0 {
return nil
} else if len(issues) == 1 {
issue := issues[0]
_, err := b.inner.Client.Index().
Index(b.inner.VersionedIndexName()).
Id(strconv.FormatInt(issue.ID, 10)).
BodyJson(issue).
Do(ctx)
return err
}
reqs := make([]elastic.BulkableRequest, 0)
for _, issue := range issues {
reqs = append(reqs,
elastic.NewBulkIndexRequest().
Index(b.inner.VersionedIndexName()).
Id(strconv.FormatInt(issue.ID, 10)).
Doc(issue),
)
}
_, err := b.inner.Client.Bulk().
Index(b.inner.VersionedIndexName()).
Add(reqs...).
Do(graceful.GetManager().HammerContext())
return err
}
// Delete deletes indexes by ids
func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
if len(ids) == 0 {
return nil
} else if len(ids) == 1 {
_, err := b.inner.Client.Delete().
Index(b.inner.VersionedIndexName()).
Id(strconv.FormatInt(ids[0], 10)).
Do(ctx)
return err
}
reqs := make([]elastic.BulkableRequest, 0)
for _, id := range ids {
reqs = append(reqs,
elastic.NewBulkDeleteRequest().
Index(b.inner.VersionedIndexName()).
Id(strconv.FormatInt(id, 10)),
)
}
_, err := b.inner.Client.Bulk().
Index(b.inner.VersionedIndexName()).
Add(reqs...).
Do(graceful.GetManager().HammerContext())
return err
}
// Search searches for issues by given conditions.
// Returns the matching issue IDs
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
query := elastic.NewBoolQuery()
if options.Keyword != "" {
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
if searchMode == indexer.SearchModeExact {
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
} else /* words */ {
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))
}
}
if len(options.RepoIDs) > 0 {
q := elastic.NewBoolQuery()
q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
if options.AllPublic {
q.Should(elastic.NewTermQuery("is_public", true))
}
query.Must(q)
}
if options.IsPull.Has() {
query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value()))
}
if options.IsClosed.Has() {
query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value()))
}
if options.IsArchived.Has() {
query.Must(elastic.NewTermQuery("is_archived", options.IsArchived.Value()))
}
if options.NoLabelOnly {
query.Must(elastic.NewTermQuery("no_label", true))
} else {
if len(options.IncludedLabelIDs) > 0 {
q := elastic.NewBoolQuery()
for _, labelID := range options.IncludedLabelIDs {
q.Must(elastic.NewTermQuery("label_ids", labelID))
}
query.Must(q)
} else if len(options.IncludedAnyLabelIDs) > 0 {
query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
}
if len(options.ExcludedLabelIDs) > 0 {
q := elastic.NewBoolQuery()
for _, labelID := range options.ExcludedLabelIDs {
q.MustNot(elastic.NewTermQuery("label_ids", labelID))
}
query.Must(q)
}
}
if len(options.MilestoneIDs) > 0 {
query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
}
if options.ProjectID.Has() {
query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value()))
}
if options.ProjectColumnID.Has() {
query.Must(elastic.NewTermQuery("project_board_id", options.ProjectColumnID.Value()))
}
if options.PosterID != "" {
// "(none)" becomes 0, it means no poster
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
query.Must(elastic.NewTermQuery("poster_id", posterIDInt64))
}
if options.AssigneeID != "" {
if options.AssigneeID == "(any)" {
q := elastic.NewRangeQuery("assignee_id")
q.Gte(1)
query.Must(q)
} else {
// "(none)" becomes 0, it means no assignee
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
query.Must(elastic.NewTermQuery("assignee_id", assigneeIDInt64))
}
}
if options.MentionID.Has() {
query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value()))
}
if options.ReviewedID.Has() {
query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value()))
}
if options.ReviewRequestedID.Has() {
query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
}
if options.SubscriberID.Has() {
query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value()))
}
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
q := elastic.NewRangeQuery("updated_unix")
if options.UpdatedAfterUnix.Has() {
q.Gte(options.UpdatedAfterUnix.Value())
}
if options.UpdatedBeforeUnix.Has() {
q.Lte(options.UpdatedBeforeUnix.Value())
}
query.Must(q)
}
if options.SortBy == "" {
options.SortBy = internal.SortByCreatedAsc
}
sortBy := []elastic.Sorter{
parseSortBy(options.SortBy),
elastic.NewFieldSort("id").Desc(),
}
// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
// TODO: make it configurable since it's configurable in elasticsearch
const maxPageSize = 10000
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
searchResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()).
Query(query).
SortBy(sortBy...).
From(skip).Size(limit).
Do(ctx)
if err != nil {
return nil, err
}
hits := make([]internal.Match, 0, limit)
for _, hit := range searchResult.Hits.Hits {
id, _ := strconv.ParseInt(hit.Id, 10, 64)
hits = append(hits, internal.Match{
ID: id,
})
}
return &internal.SearchResult{
Total: searchResult.TotalHits(),
Hits: hits,
}, nil
}
func toAnySlice[T any](s []T) []any {
ret := make([]any, 0, len(s))
for _, item := range s {
ret = append(ret, item)
}
return ret
}
func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
field := strings.TrimPrefix(string(sortBy), "-")
ret := elastic.NewFieldSort(field)
if strings.HasPrefix(string(sortBy), "-") {
ret.Desc()
}
return ret
}

View File

@@ -0,0 +1,40 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package elasticsearch
import (
"fmt"
"net/http"
"os"
"testing"
"time"
"code.gitea.io/gitea/modules/indexer/issues/internal/tests"
"github.com/stretchr/testify/require"
)
func TestElasticsearchIndexer(t *testing.T) {
// The elasticsearch instance started by pull-db-tests.yml > test-unit > services > elasticsearch
url := "http://elastic:changeme@elasticsearch:9200"
if os.Getenv("CI") == "" {
// Make it possible to run tests against a local elasticsearch instance
url = os.Getenv("TEST_ELASTICSEARCH_URL")
if url == "" {
t.Skip("TEST_ELASTICSEARCH_URL not set and not running in CI")
return
}
}
require.Eventually(t, func() bool {
resp, err := http.Get(url)
return err == nil && resp.StatusCode == http.StatusOK
}, time.Minute, time.Second, "Expected elasticsearch to be up")
indexer := NewIndexer(url, fmt.Sprintf("test_elasticsearch_indexer_%d", time.Now().Unix()))
defer indexer.Close()
tests.TestIndexer(t, indexer)
}

View File

@@ -0,0 +1,326 @@
// Copyright 2018 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package issues
import (
"context"
"fmt"
"os"
"runtime/pprof"
"sync/atomic"
"time"
db_model "code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/issues/bleve"
"code.gitea.io/gitea/modules/indexer/issues/db"
"code.gitea.io/gitea/modules/indexer/issues/elasticsearch"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/indexer/issues/meilisearch"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
)
// IndexerMetadata is used to send data to the queue, so it contains only the ids.
// It may look weired, because it has to be compatible with the old queue data format.
// If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database.
// If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data.
// It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false.
// Valid values:
// - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored
// - IsDelete = false, ID = 1, and IDs will be ignored
type IndexerMetadata struct {
ID int64 `json:"id"`
IsDelete bool `json:"is_delete"`
IDs []int64 `json:"ids"`
}
var (
// issueIndexerQueue queue of issue ids to be updated
issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata]
// globalIndexer is the global indexer, it cannot be nil.
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
// So it's always safe use it as *globalIndexer.Load() and call its methods.
globalIndexer atomic.Pointer[internal.Indexer]
dummyIndexer *internal.Indexer
)
func init() {
i := internal.NewDummyIndexer()
dummyIndexer = &i
globalIndexer.Store(dummyIndexer)
}
// InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until
// all issue index done.
func InitIssueIndexer(syncReindex bool) {
ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false)
indexerInitWaitChannel := make(chan time.Duration, 1)
// Create the Queue
issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx))
graceful.GetManager().RunAtTerminate(finished)
// Create the Indexer
go func() {
pprof.SetGoroutineLabels(ctx)
start := time.Now()
log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType)
var (
issueIndexer internal.Indexer
existed bool
err error
)
switch setting.Indexer.IssueType {
case "bleve":
defer func() {
if err := recover(); err != nil {
log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2))
log.Error("The indexer files are likely corrupted and may need to be deleted")
log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath)
globalIndexer.Store(dummyIndexer)
log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err)
}
}()
issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath)
existed, err = issueIndexer.Init(ctx)
if err != nil {
log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err)
}
case "elasticsearch":
issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName)
existed, err = issueIndexer.Init(ctx)
if err != nil {
log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
}
case "db":
issueIndexer = db.GetIndexer()
case "meilisearch":
issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName)
existed, err = issueIndexer.Init(ctx)
if err != nil {
log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
}
default:
log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType)
}
globalIndexer.Store(&issueIndexer)
graceful.GetManager().RunAtTerminate(func() {
log.Debug("Closing issue indexer")
(*globalIndexer.Load()).Close()
log.Info("PID: %d Issue Indexer closed", os.Getpid())
})
// Start processing the queue
go graceful.GetManager().RunWithCancel(issueIndexerQueue)
// Populate the index
if !existed {
if syncReindex {
graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
} else {
go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
}
}
indexerInitWaitChannel <- time.Since(start)
close(indexerInitWaitChannel)
}()
if syncReindex {
select {
case <-indexerInitWaitChannel:
case <-graceful.GetManager().IsShutdown():
}
} else if setting.Indexer.StartupTimeout > 0 {
go func() {
pprof.SetGoroutineLabels(ctx)
timeout := setting.Indexer.StartupTimeout
if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
timeout += setting.GracefulHammerTime
}
select {
case duration := <-indexerInitWaitChannel:
log.Info("Issue Indexer Initialization took %v", duration)
case <-graceful.GetManager().IsShutdown():
log.Warn("Shutdown occurred before issue index initialisation was complete")
case <-time.After(timeout):
issueIndexerQueue.ShutdownWait(5 * time.Second)
log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout)
}
}()
}
}
func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata {
return func(items ...*IndexerMetadata) []*IndexerMetadata {
var unhandled []*IndexerMetadata
indexer := *globalIndexer.Load()
for _, item := range items {
log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete)
if item.IsDelete {
if err := indexer.Delete(ctx, item.IDs...); err != nil {
log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err)
unhandled = append(unhandled, item)
}
continue
}
data, existed, err := getIssueIndexerData(ctx, item.ID)
if err != nil {
log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err)
unhandled = append(unhandled, item)
continue
}
if !existed {
if err := indexer.Delete(ctx, item.ID); err != nil {
log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err)
unhandled = append(unhandled, item)
}
continue
}
if err := indexer.Index(ctx, data); err != nil {
log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err)
unhandled = append(unhandled, item)
continue
}
}
return unhandled
}
}
// populateIssueIndexer populate the issue indexer with issue data
func populateIssueIndexer(ctx context.Context) {
ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
defer finished()
ctx = contextWithKeepRetry(ctx) // keep retrying since it's a background task
if err := PopulateIssueIndexer(ctx); err != nil {
log.Error("Issue indexer population failed: %v", err)
}
}
func PopulateIssueIndexer(ctx context.Context) error {
for page := 1; ; page++ {
select {
case <-ctx.Done():
return fmt.Errorf("shutdown before completion: %w", ctx.Err())
default:
}
repos, _, err := repo_model.SearchRepositoryByName(ctx, repo_model.SearchRepoOptions{
ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize},
OrderBy: db_model.SearchOrderByID,
Private: true,
Collaborate: optional.Some(false),
})
if err != nil {
log.Error("SearchRepositoryByName: %v", err)
continue
}
if len(repos) == 0 {
log.Debug("Issue Indexer population complete")
return nil
}
for _, repo := range repos {
if err := updateRepoIndexer(ctx, repo.ID); err != nil {
return fmt.Errorf("populate issue indexer for repo %d: %v", repo.ID, err)
}
}
}
}
// UpdateRepoIndexer add/update all issues of the repositories
func UpdateRepoIndexer(ctx context.Context, repoID int64) {
if err := updateRepoIndexer(ctx, repoID); err != nil {
log.Error("Unable to push repo %d to issue indexer: %v", repoID, err)
}
}
// UpdateIssueIndexer add/update an issue to the issue indexer
func UpdateIssueIndexer(ctx context.Context, issueID int64) {
if err := updateIssueIndexer(ctx, issueID); err != nil {
log.Error("Unable to push issue %d to issue indexer: %v", issueID, err)
}
}
// DeleteRepoIssueIndexer deletes repo's all issues indexes
func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) {
if err := deleteRepoIssueIndexer(ctx, repoID); err != nil {
log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err)
}
}
// IsAvailable checks if issue indexer is available
func IsAvailable(ctx context.Context) bool {
return (*globalIndexer.Load()).Ping(ctx) == nil
}
// SearchOptions indicates the options for searching issues
type SearchOptions = internal.SearchOptions
const (
SortByCreatedDesc = internal.SortByCreatedDesc
SortByUpdatedDesc = internal.SortByUpdatedDesc
SortByCommentsDesc = internal.SortByCommentsDesc
SortByDeadlineDesc = internal.SortByDeadlineDesc
SortByCreatedAsc = internal.SortByCreatedAsc
SortByUpdatedAsc = internal.SortByUpdatedAsc
SortByCommentsAsc = internal.SortByCommentsAsc
SortByDeadlineAsc = internal.SortByDeadlineAsc
)
// SearchIssues search issues by options.
func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
ix := *globalIndexer.Load()
if opts.Keyword == "" || opts.IsKeywordNumeric() {
// This is a conservative shortcut.
// If the keyword is empty or an integer, db has better (at least not worse) performance to filter issues.
// When the keyword is empty, it tends to listing rather than searching issues.
// So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
// Even worse, the external indexer like elastic search may not be available for a while,
// and the user may not be able to list issues completely until it is available again.
ix = db.GetIndexer()
}
result, err := ix.Search(ctx, opts)
if err != nil {
return nil, 0, err
}
return SearchResultToIDSlice(result), result.Total, nil
}
func SearchResultToIDSlice(result *internal.SearchResult) []int64 {
ret := make([]int64, 0, len(result.Hits))
for _, hit := range result.Hits {
ret = append(ret, hit.ID)
}
return ret
}
// CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count.
func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) {
opts = opts.Copy(func(options *SearchOptions) { options.Paginator = &db_model.ListOptions{PageSize: 0} })
_, total, err := SearchIssues(ctx, opts)
return total, err
}
func SupportedSearchModes() []indexer.SearchMode {
gi := globalIndexer.Load()
if gi == nil {
return nil
}
return (*gi).SupportedSearchModes()
}

View File

@@ -0,0 +1,487 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package issues
import (
"testing"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/setting"
_ "code.gitea.io/gitea/models"
_ "code.gitea.io/gitea/models/actions"
_ "code.gitea.io/gitea/models/activities"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) {
unittest.MainTest(m)
}
func TestDBSearchIssues(t *testing.T) {
require.NoError(t, unittest.PrepareTestDatabase())
setting.Indexer.IssueType = "db"
InitIssueIndexer(true)
t.Run("search issues with keyword", searchIssueWithKeyword)
t.Run("search issues by index", searchIssueByIndex)
t.Run("search issues in repo", searchIssueInRepo)
t.Run("search issues by ID", searchIssueByID)
t.Run("search issues is pr", searchIssueIsPull)
t.Run("search issues is closed", searchIssueIsClosed)
t.Run("search issues is archived", searchIssueIsArchived)
t.Run("search issues by milestone", searchIssueByMilestoneID)
t.Run("search issues by label", searchIssueByLabelID)
t.Run("search issues by time", searchIssueByTime)
t.Run("search issues with order", searchIssueWithOrder)
t.Run("search issues in project", searchIssueInProject)
t.Run("search issues with paginator", searchIssueWithPaginator)
t.Run("search issues with any assignee", searchIssueWithAnyAssignee)
}
func searchIssueWithKeyword(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
Keyword: "issue2",
RepoIDs: []int64{1},
},
[]int64{2},
},
{
SearchOptions{
Keyword: "first",
RepoIDs: []int64{1},
},
[]int64{1},
},
{
SearchOptions{
Keyword: "for",
RepoIDs: []int64{1},
},
[]int64{11, 5, 3, 2, 1},
},
{
SearchOptions{
Keyword: "good",
RepoIDs: []int64{1},
},
[]int64{1},
},
}
for _, test := range tests {
t.Run(test.opts.Keyword, func(t *testing.T) {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
})
}
}
func searchIssueByIndex(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
Keyword: "1000",
RepoIDs: []int64{1},
},
[]int64{},
},
{
SearchOptions{
Keyword: "2",
RepoIDs: []int64{1, 2, 3, 32},
},
[]int64{17, 12, 7, 2},
},
{
SearchOptions{
Keyword: "1",
RepoIDs: []int64{58},
},
[]int64{19},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueInRepo(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
RepoIDs: []int64{1},
},
[]int64{11, 5, 3, 2, 1},
},
{
SearchOptions{
RepoIDs: []int64{2},
},
[]int64{7, 4},
},
{
SearchOptions{
RepoIDs: []int64{3},
},
[]int64{12, 6},
},
{
SearchOptions{
RepoIDs: []int64{4},
},
[]int64{},
},
{
SearchOptions{
RepoIDs: []int64{5},
},
[]int64{15},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueByID(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
opts: SearchOptions{
PosterID: "1",
},
expectedIDs: []int64{11, 6, 3, 2, 1},
},
{
opts: SearchOptions{
AssigneeID: "1",
},
expectedIDs: []int64{6, 1},
},
{
// NOTE: This tests no assignees filtering and also ToSearchOptions() to ensure it handles the filter correctly
opts: *ToSearchOptions("", &issues.IssuesOptions{AssigneeID: "(none)"}),
expectedIDs: []int64{22, 21, 16, 15, 14, 13, 12, 11, 20, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2},
},
{
opts: SearchOptions{
MentionID: optional.Some(int64(4)),
},
expectedIDs: []int64{1},
},
{
opts: SearchOptions{
ReviewedID: optional.Some(int64(1)),
},
expectedIDs: []int64{},
},
{
opts: SearchOptions{
ReviewRequestedID: optional.Some(int64(1)),
},
expectedIDs: []int64{12},
},
{
opts: SearchOptions{
SubscriberID: optional.Some(int64(1)),
},
expectedIDs: []int64{11, 6, 5, 3, 2, 1},
},
{
// issue 20 request user 15 and team 5 which user 15 belongs to
// the review request number of issue 20 should be 1
opts: SearchOptions{
ReviewRequestedID: optional.Some(int64(15)),
},
expectedIDs: []int64{12, 20},
},
{
// user 20 approved the issue 20, so return nothing
opts: SearchOptions{
ReviewRequestedID: optional.Some(int64(20)),
},
expectedIDs: []int64{},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueIsPull(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
IsPull: optional.Some(false),
},
[]int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
},
{
SearchOptions{
IsPull: optional.Some(true),
},
[]int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueIsClosed(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
IsClosed: optional.Some(false),
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
},
{
SearchOptions{
IsClosed: optional.Some(true),
},
[]int64{5, 4},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueIsArchived(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
IsArchived: optional.Some(false),
},
[]int64{22, 21, 17, 16, 15, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
},
{
SearchOptions{
IsArchived: optional.Some(true),
},
[]int64{14},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueByMilestoneID(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
MilestoneIDs: []int64{1},
},
[]int64{2},
},
{
SearchOptions{
MilestoneIDs: []int64{3},
},
[]int64{3},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueByLabelID(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
IncludedLabelIDs: []int64{1},
},
[]int64{2, 1},
},
{
SearchOptions{
IncludedLabelIDs: []int64{4},
},
[]int64{2},
},
{
SearchOptions{
ExcludedLabelIDs: []int64{1},
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueByTime(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
UpdatedAfterUnix: optional.Some(int64(0)),
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueWithOrder(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
SortBy: internal.SortByCreatedAsc,
},
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueInProject(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
}{
{
SearchOptions{
ProjectID: optional.Some(int64(1)),
},
[]int64{5, 3, 2, 1},
},
{
SearchOptions{
ProjectColumnID: optional.Some(int64(1)),
},
[]int64{1},
},
{
SearchOptions{
ProjectColumnID: optional.Some(int64(0)), // issue with in default column
},
[]int64{2},
},
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
}
}
func searchIssueWithPaginator(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
expectedTotal int64
}{
{
SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
},
[]int64{22, 21, 17, 16, 15},
22,
},
}
for _, test := range tests {
issueIDs, total, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
assert.Equal(t, test.expectedTotal, total)
}
}
func searchIssueWithAnyAssignee(t *testing.T) {
tests := []struct {
opts SearchOptions
expectedIDs []int64
expectedTotal int64
}{
{
SearchOptions{
AssigneeID: "(any)",
},
[]int64{17, 6, 1},
3,
},
}
for _, test := range tests {
issueIDs, total, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
assert.Equal(t, test.expectedTotal, total)
}
}

View File

@@ -0,0 +1,48 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"context"
"errors"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/internal"
)
// Indexer defines an interface to indexer issues contents
type Indexer interface {
internal.Indexer
Index(ctx context.Context, issue ...*IndexerData) error
Delete(ctx context.Context, ids ...int64) error
Search(ctx context.Context, options *SearchOptions) (*SearchResult, error)
SupportedSearchModes() []indexer.SearchMode
}
// NewDummyIndexer returns a dummy indexer
func NewDummyIndexer() Indexer {
return &dummyIndexer{
Indexer: internal.NewDummyIndexer(),
}
}
type dummyIndexer struct {
internal.Indexer
}
func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
return nil
}
func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error {
return errors.New("indexer is not ready")
}
func (d *dummyIndexer) Delete(_ context.Context, _ ...int64) error {
return errors.New("indexer is not ready")
}
func (d *dummyIndexer) Search(_ context.Context, _ *SearchOptions) (*SearchResult, error) {
return nil, errors.New("indexer is not ready")
}

View File

@@ -0,0 +1,160 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"strconv"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/timeutil"
)
// IndexerData data stored in the issue indexer
type IndexerData struct {
ID int64 `json:"id"`
RepoID int64 `json:"repo_id"`
IsPublic bool `json:"is_public"` // If the repo is public
// Fields used for keyword searching
Title string `json:"title"`
Content string `json:"content"`
Comments []string `json:"comments"`
// Fields used for filtering
IsPull bool `json:"is_pull"`
IsClosed bool `json:"is_closed"`
IsArchived bool `json:"is_archived"`
LabelIDs []int64 `json:"label_ids"`
NoLabel bool `json:"no_label"` // True if LabelIDs is empty
MilestoneID int64 `json:"milestone_id"`
ProjectID int64 `json:"project_id"`
ProjectColumnID int64 `json:"project_board_id"` // the key should be kept as project_board_id to keep compatible
PosterID int64 `json:"poster_id"`
AssigneeID int64 `json:"assignee_id"`
MentionIDs []int64 `json:"mention_ids"`
ReviewedIDs []int64 `json:"reviewed_ids"`
ReviewRequestedIDs []int64 `json:"review_requested_ids"`
SubscriberIDs []int64 `json:"subscriber_ids"`
UpdatedUnix timeutil.TimeStamp `json:"updated_unix"`
// Fields used for sorting
// UpdatedUnix is both used for filtering and sorting.
// ID is used for sorting too, to make the sorting stable.
CreatedUnix timeutil.TimeStamp `json:"created_unix"`
DeadlineUnix timeutil.TimeStamp `json:"deadline_unix"`
CommentCount int64 `json:"comment_count"`
}
// Match represents on search result
type Match struct {
ID int64 `json:"id"`
Score float64 `json:"score"`
}
// SearchResult represents search results
type SearchResult struct {
Total int64
Hits []Match
}
// SearchOptions represents search options.
//
// It has a slightly different design from database query options.
// In database query options, a field is never a pointer, so it could be confusing when it's zero value:
// Do you want to find data with a field value of 0, or do you not specify the field in the options?
// To avoid this confusion, db introduced db.NoConditionID(-1).
// So zero value means the field is not specified in the search options, and db.NoConditionID means "== 0" or "id NOT IN (SELECT id FROM ...)"
// It's still not ideal, it trapped developers many times.
// And sometimes -1 could be a valid value, like issue ID, negative numbers indicate exclusion.
// Since db.NoConditionID is for "db" (the package name is db), it makes sense not to use it in the indexer:
// Why do bleve/elasticsearch/meilisearch indexers need to know about db.NoConditionID?
// So in SearchOptions, we use pointer for fields which could be not specified,
// and always use the value to filter if it's not nil, even if it's zero or negative.
// It can handle almost all cases, if there is an exception, we can add a new field, like NoLabelOnly.
// Unfortunately, we still use db for the indexer and have to convert between db.NoConditionID and nil for legacy reasons.
type SearchOptions struct {
Keyword string // keyword to search
SearchMode indexer.SearchModeType
RepoIDs []int64 // repository IDs which the issues belong to
AllPublic bool // if include all public repositories
IsPull optional.Option[bool] // if the issues is a pull request
IsClosed optional.Option[bool] // if the issues is closed
IsArchived optional.Option[bool] // if the repo is archived
IncludedLabelIDs []int64 // labels the issues have
ExcludedLabelIDs []int64 // labels the issues don't have
IncludedAnyLabelIDs []int64 // labels the issues have at least one. It will be ignored if IncludedLabelIDs is not empty. It's an uncommon filter, but it has been supported accidentally by issues.IssuesOptions.IncludedLabelNames.
NoLabelOnly bool // if the issues have no label, if true, IncludedLabelIDs and ExcludedLabelIDs, IncludedAnyLabelIDs will be ignored
MilestoneIDs []int64 // milestones the issues have
ProjectID optional.Option[int64] // project the issues belong to
ProjectColumnID optional.Option[int64] // project column the issues belong to
PosterID string // poster of the issues, "(none)" or "(any)" or a user ID
AssigneeID string // assignee of the issues, "(none)" or "(any)" or a user ID
MentionID optional.Option[int64] // mentioned user of the issues
ReviewedID optional.Option[int64] // reviewer of the issues
ReviewRequestedID optional.Option[int64] // requested reviewer of the issues
SubscriberID optional.Option[int64] // subscriber of the issues
UpdatedAfterUnix optional.Option[int64]
UpdatedBeforeUnix optional.Option[int64]
Paginator *db.ListOptions
SortBy SortBy // sort by field
}
// Copy returns a copy of the options.
// Be careful, it's not a deep copy, so `SearchOptions.RepoIDs = {...}` is OK while `SearchOptions.RepoIDs[0] = ...` is not.
func (o *SearchOptions) Copy(edit ...func(options *SearchOptions)) *SearchOptions {
if o == nil {
return nil
}
v := *o
for _, e := range edit {
e(&v)
}
return &v
}
// used for optimized issue index based search
func (o *SearchOptions) IsKeywordNumeric() bool {
_, err := strconv.Atoi(o.Keyword)
return err == nil
}
type SortBy string
const (
SortByCreatedDesc SortBy = "-created_unix"
SortByUpdatedDesc SortBy = "-updated_unix"
SortByCommentsDesc SortBy = "-comment_count"
SortByDeadlineDesc SortBy = "-deadline_unix"
SortByCreatedAsc SortBy = "created_unix"
SortByUpdatedAsc SortBy = "updated_unix"
SortByCommentsAsc SortBy = "comment_count"
SortByDeadlineAsc SortBy = "deadline_unix"
// Unsupported sort types which are supported by issues.IssuesOptions.SortType:
//
// - "priorityrepo":
// It's impossible to support it in the indexer.
// It is based on the specified repository in the request, so we cannot add static field to the indexer.
// If we do something like that query the issues in the specified repository first then append other issues,
// it will break the pagination.
//
// - "project-column-sorting":
// Although it's possible to support it by adding project.ProjectIssue.Sorting to the indexer,
// but what if the issue belongs to multiple projects?
// Since it's unsupported to search issues with keyword in project page, we don't need to support it.
)

View File

@@ -0,0 +1,759 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
// This package contains tests for issues indexer modules.
// All the code in this package is only used for testing.
// Do not put any production code in this package to avoid it being included in the final binary.
package tests
import (
"fmt"
"slices"
"testing"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/timeutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestIndexer(t *testing.T, indexer internal.Indexer) {
_, err := indexer.Init(t.Context())
require.NoError(t, err)
require.NoError(t, indexer.Ping(t.Context()))
var (
ids []int64
data = map[int64]*internal.IndexerData{}
)
{
d := generateDefaultIndexerData()
for _, v := range d {
ids = append(ids, v.ID)
data[v.ID] = v
}
require.NoError(t, indexer.Index(t.Context(), d...))
waitData(t, indexer, int64(len(data)))
}
defer func() {
require.NoError(t, indexer.Delete(t.Context(), ids...))
}()
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
if len(c.ExtraData) > 0 {
require.NoError(t, indexer.Index(t.Context(), c.ExtraData...))
for _, v := range c.ExtraData {
data[v.ID] = v
}
waitData(t, indexer, int64(len(data)))
defer func() {
for _, v := range c.ExtraData {
require.NoError(t, indexer.Delete(t.Context(), v.ID))
delete(data, v.ID)
}
waitData(t, indexer, int64(len(data)))
}()
}
result, err := indexer.Search(t.Context(), c.SearchOptions)
require.NoError(t, err)
if c.Expected != nil {
c.Expected(t, data, result)
} else {
ids := make([]int64, 0, len(result.Hits))
for _, hit := range result.Hits {
ids = append(ids, hit.ID)
}
assert.Equal(t, c.ExpectedIDs, ids)
assert.Equal(t, c.ExpectedTotal, result.Total)
}
// test counting
c.SearchOptions.Paginator = &db.ListOptions{PageSize: 0}
countResult, err := indexer.Search(t.Context(), c.SearchOptions)
require.NoError(t, err)
assert.Empty(t, countResult.Hits)
assert.Equal(t, result.Total, countResult.Total)
})
}
}
var cases = []*testIndexerCase{
{
Name: "default",
SearchOptions: &internal.SearchOptions{},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
},
},
{
Name: "empty",
SearchOptions: &internal.SearchOptions{
Keyword: "f1dfac73-fda6-4a6b-b8a4-2408fcb8ef69",
},
ExpectedIDs: []int64{},
ExpectedTotal: 0,
},
{
Name: "with limit",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
assert.Equal(t, len(data), int(result.Total))
},
},
{
Name: "Keyword",
ExtraData: []*internal.IndexerData{
{ID: 1000, Title: "hi hello world"},
{ID: 1001, Content: "hi hello world"},
{ID: 1002, Comments: []string{"hi", "hello world"}},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello",
},
ExpectedIDs: []int64{1002, 1001, 1000},
ExpectedTotal: 3,
},
{
Name: "RepoIDs",
ExtraData: []*internal.IndexerData{
{ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false},
{ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false},
{ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true},
{ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true},
{ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true},
{ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false},
{ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello",
RepoIDs: []int64{1, 4},
},
ExpectedIDs: []int64{1006, 1002, 1001},
ExpectedTotal: 3,
},
{
Name: "RepoIDs and AllPublic",
ExtraData: []*internal.IndexerData{
{ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false},
{ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false},
{ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true},
{ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true},
{ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true},
{ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false},
{ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello",
RepoIDs: []int64{1, 4},
AllPublic: true,
},
ExpectedIDs: []int64{1006, 1005, 1004, 1003, 1002, 1001},
ExpectedTotal: 6,
},
{
Name: "issue only",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
IsPull: optional.Some(false),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.False(t, data[v.ID].IsPull)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsPull }), result.Total)
},
},
{
Name: "pull only",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
IsPull: optional.Some(true),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.True(t, data[v.ID].IsPull)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsPull }), result.Total)
},
},
{
Name: "opened only",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
IsClosed: optional.Some(false),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.False(t, data[v.ID].IsClosed)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsClosed }), result.Total)
},
},
{
Name: "closed only",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
IsClosed: optional.Some(true),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.True(t, data[v.ID].IsClosed)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsClosed }), result.Total)
},
},
{
Name: "labels",
ExtraData: []*internal.IndexerData{
{ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}},
{ID: 1001, Title: "hello b", LabelIDs: []int64{2000, 2001}},
{ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}},
{ID: 1003, Title: "hello d", LabelIDs: []int64{2000}},
{ID: 1004, Title: "hello e", LabelIDs: []int64{}},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello",
IncludedLabelIDs: []int64{2000, 2001},
ExcludedLabelIDs: []int64{2003},
},
ExpectedIDs: []int64{1001, 1000},
ExpectedTotal: 2,
},
{
Name: "include any labels",
ExtraData: []*internal.IndexerData{
{ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}},
{ID: 1001, Title: "hello b", LabelIDs: []int64{2001}},
{ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}},
{ID: 1003, Title: "hello d", LabelIDs: []int64{2002}},
{ID: 1004, Title: "hello e", LabelIDs: []int64{}},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello",
IncludedAnyLabelIDs: []int64{2001, 2002},
ExcludedLabelIDs: []int64{2003},
},
ExpectedIDs: []int64{1003, 1001, 1000},
ExpectedTotal: 3,
},
{
Name: "MilestoneIDs",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
MilestoneIDs: []int64{1, 2, 6},
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Contains(t, []int64{1, 2, 6}, data[v.ID].MilestoneID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.MilestoneID == 1 || v.MilestoneID == 2 || v.MilestoneID == 6
}), result.Total)
},
},
{
Name: "no MilestoneIDs",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
MilestoneIDs: []int64{0},
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(0), data[v.ID].MilestoneID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.MilestoneID == 0
}), result.Total)
},
},
{
Name: "ProjectID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
ProjectID: optional.Some(int64(1)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(1), data[v.ID].ProjectID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.ProjectID == 1
}), result.Total)
},
},
{
Name: "no ProjectID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
ProjectID: optional.Some(int64(0)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(0), data[v.ID].ProjectID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.ProjectID == 0
}), result.Total)
},
},
{
Name: "ProjectColumnID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
ProjectColumnID: optional.Some(int64(1)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(1), data[v.ID].ProjectColumnID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.ProjectColumnID == 1
}), result.Total)
},
},
{
Name: "no ProjectColumnID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
ProjectColumnID: optional.Some(int64(0)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(0), data[v.ID].ProjectColumnID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.ProjectColumnID == 0
}), result.Total)
},
},
{
Name: "PosterID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
PosterID: "1",
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(1), data[v.ID].PosterID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.PosterID == 1
}), result.Total)
},
},
{
Name: "AssigneeID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
AssigneeID: "1",
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(1), data[v.ID].AssigneeID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.AssigneeID == 1
}), result.Total)
},
},
{
Name: "no AssigneeID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
AssigneeID: "(none)",
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Equal(t, int64(0), data[v.ID].AssigneeID)
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.AssigneeID == 0
}), result.Total)
},
},
{
Name: "MentionID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
MentionID: optional.Some(int64(1)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Contains(t, data[v.ID].MentionIDs, int64(1))
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return slices.Contains(v.MentionIDs, 1)
}), result.Total)
},
},
{
Name: "ReviewedID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
ReviewedID: optional.Some(int64(1)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Contains(t, data[v.ID].ReviewedIDs, int64(1))
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return slices.Contains(v.ReviewedIDs, 1)
}), result.Total)
},
},
{
Name: "ReviewRequestedID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
ReviewRequestedID: optional.Some(int64(1)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Contains(t, data[v.ID].ReviewRequestedIDs, int64(1))
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return slices.Contains(v.ReviewRequestedIDs, 1)
}), result.Total)
},
},
{
Name: "SubscriberID",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
SubscriberID: optional.Some(int64(1)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.Contains(t, data[v.ID].SubscriberIDs, int64(1))
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return slices.Contains(v.SubscriberIDs, 1)
}), result.Total)
},
},
{
Name: "updated",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptions{
PageSize: 5,
},
UpdatedAfterUnix: optional.Some(int64(20)),
UpdatedBeforeUnix: optional.Some(int64(30)),
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 5)
for _, v := range result.Hits {
assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, int64(20))
assert.LessOrEqual(t, data[v.ID].UpdatedUnix, int64(30))
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return data[v.ID].UpdatedUnix >= 20 && data[v.ID].UpdatedUnix <= 30
}), result.Total)
},
},
{
Name: "SortByCreatedDesc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByCreatedDesc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.GreaterOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix)
}
}
},
},
{
Name: "SortByUpdatedDesc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByUpdatedDesc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix)
}
}
},
},
{
Name: "SortByCommentsDesc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByCommentsDesc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.GreaterOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount)
}
}
},
},
{
Name: "SortByDeadlineDesc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByDeadlineDesc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.GreaterOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix)
}
}
},
},
{
Name: "SortByCreatedAsc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByCreatedAsc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.LessOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix)
}
}
},
},
{
Name: "SortByUpdatedAsc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByUpdatedAsc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.LessOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix)
}
}
},
},
{
Name: "SortByCommentsAsc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByCommentsAsc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.LessOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount)
}
}
},
},
{
Name: "SortByDeadlineAsc",
SearchOptions: &internal.SearchOptions{
Paginator: &db.ListOptionsAll,
SortBy: internal.SortByDeadlineAsc,
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, len(data))
assert.Equal(t, len(data), int(result.Total))
for i, v := range result.Hits {
if i < len(result.Hits)-1 {
assert.LessOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix)
}
}
},
},
{
Name: "SearchAnyAssignee",
SearchOptions: &internal.SearchOptions{
AssigneeID: "(any)",
},
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
assert.Len(t, result.Hits, 180)
for _, v := range result.Hits {
assert.GreaterOrEqual(t, data[v.ID].AssigneeID, int64(1))
}
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
return v.AssigneeID >= 1
}), result.Total)
},
},
}
type testIndexerCase struct {
Name string
ExtraData []*internal.IndexerData
SearchOptions *internal.SearchOptions
Expected func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) // if nil, use ExpectedIDs, ExpectedTotal
ExpectedIDs []int64
ExpectedTotal int64
}
func generateDefaultIndexerData() []*internal.IndexerData {
var id int64
var data []*internal.IndexerData
for repoID := int64(1); repoID <= 10; repoID++ {
for issueIndex := int64(1); issueIndex <= 20; issueIndex++ {
id++
comments := make([]string, id%4)
for i := range comments {
comments[i] = fmt.Sprintf("comment%d", i)
}
labelIDs := make([]int64, id%5)
for i := range labelIDs {
labelIDs[i] = int64(i) + 1 // LabelID should not be 0
}
mentionIDs := make([]int64, id%6)
for i := range mentionIDs {
mentionIDs[i] = int64(i) + 1 // MentionID should not be 0
}
reviewedIDs := make([]int64, id%7)
for i := range reviewedIDs {
reviewedIDs[i] = int64(i) + 1 // ReviewID should not be 0
}
reviewRequestedIDs := make([]int64, id%8)
for i := range reviewRequestedIDs {
reviewRequestedIDs[i] = int64(i) + 1 // ReviewRequestedID should not be 0
}
subscriberIDs := make([]int64, id%9)
for i := range subscriberIDs {
subscriberIDs[i] = int64(i) + 1 // SubscriberID should not be 0
}
data = append(data, &internal.IndexerData{
ID: id,
RepoID: repoID,
IsPublic: repoID%2 == 0,
Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID),
Content: fmt.Sprintf("content%d", issueIndex),
Comments: comments,
IsPull: issueIndex%2 == 0,
IsClosed: issueIndex%3 == 0,
LabelIDs: labelIDs,
NoLabel: len(labelIDs) == 0,
MilestoneID: issueIndex % 4,
ProjectID: issueIndex % 5,
ProjectColumnID: issueIndex % 6,
PosterID: id%10 + 1, // PosterID should not be 0
AssigneeID: issueIndex % 10,
MentionIDs: mentionIDs,
ReviewedIDs: reviewedIDs,
ReviewRequestedIDs: reviewRequestedIDs,
SubscriberIDs: subscriberIDs,
UpdatedUnix: timeutil.TimeStamp(id + issueIndex),
CreatedUnix: timeutil.TimeStamp(id),
DeadlineUnix: timeutil.TimeStamp(id + issueIndex + repoID),
CommentCount: int64(len(comments)),
})
}
}
return data
}
func countIndexerData(data map[int64]*internal.IndexerData, f func(v *internal.IndexerData) bool) int64 {
var count int64
for _, v := range data {
if f(v) {
count++
}
}
return count
}
// waitData waits for the indexer to index all data.
// Some engines like Elasticsearch index data asynchronously, so we need to wait for a while.
func waitData(t *testing.T, indexer internal.Indexer, total int64) {
assert.Eventually(t, func() bool {
result, err := indexer.Search(t.Context(), &internal.SearchOptions{Paginator: &db.ListOptions{}})
require.NoError(t, err)
return result.Total == total
}, 10*time.Second, 100*time.Millisecond, "expected total=%d", total)
}

View File

@@ -0,0 +1,317 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package meilisearch
import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"code.gitea.io/gitea/modules/indexer"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"github.com/meilisearch/meilisearch-go"
)
const (
issueIndexerLatestVersion = 4
// TODO: make this configurable if necessary
maxTotalHits = 10000
)
// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
var _ internal.Indexer = &Indexer{}
// Indexer implements Indexer interface
type Indexer struct {
inner *inner_meilisearch.Indexer
indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
}
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWords()
}
// NewIndexer creates a new meilisearch indexer
func NewIndexer(url, apiKey, indexerName string) *Indexer {
settings := &meilisearch.Settings{
// The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
// So even if we specify the sort order, it could not be respected because the priority of "sort" is so low.
// So we need to specify the ranking rules to make sure the sort order is respected.
// See https://www.meilisearch.com/docs/learn/core_concepts/relevancy
RankingRules: []string{"sort", // make sure "sort" has the highest priority
"words", "typo", "proximity", "attribute", "exactness"},
SearchableAttributes: []string{
"title",
"content",
"comments",
},
DisplayedAttributes: []string{
"id",
"title",
"content",
"comments",
},
FilterableAttributes: []string{
"repo_id",
"is_public",
"is_pull",
"is_closed",
"is_archived",
"label_ids",
"no_label",
"milestone_id",
"project_id",
"project_board_id",
"poster_id",
"assignee_id",
"mention_ids",
"reviewed_ids",
"review_requested_ids",
"subscriber_ids",
"updated_unix",
},
SortableAttributes: []string{
"updated_unix",
"created_unix",
"deadline_unix",
"comment_count",
"id",
},
Pagination: &meilisearch.Pagination{
MaxTotalHits: maxTotalHits,
},
}
inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings)
indexer := &Indexer{
inner: inner,
Indexer: inner,
}
return indexer
}
// Index will save the index data
func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
if len(issues) == 0 {
return nil
}
for _, issue := range issues {
_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue)
if err != nil {
return err
}
}
// TODO: bulk send index data
return nil
}
// Delete deletes indexes by ids
func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
if len(ids) == 0 {
return nil
}
for _, id := range ids {
_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10))
if err != nil {
return err
}
}
// TODO: bulk send deletes
return nil
}
// Search searches for issues by given conditions.
// Returns the matching issue IDs
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
query := inner_meilisearch.FilterAnd{}
if len(options.RepoIDs) > 0 {
q := &inner_meilisearch.FilterOr{}
q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...))
if options.AllPublic {
q.Or(inner_meilisearch.NewFilterEq("is_public", true))
}
query.And(q)
}
if options.IsPull.Has() {
query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value()))
}
if options.IsClosed.Has() {
query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value()))
}
if options.IsArchived.Has() {
query.And(inner_meilisearch.NewFilterEq("is_archived", options.IsArchived.Value()))
}
if options.NoLabelOnly {
query.And(inner_meilisearch.NewFilterEq("no_label", true))
} else {
if len(options.IncludedLabelIDs) > 0 {
q := &inner_meilisearch.FilterAnd{}
for _, labelID := range options.IncludedLabelIDs {
q.And(inner_meilisearch.NewFilterEq("label_ids", labelID))
}
query.And(q)
} else if len(options.IncludedAnyLabelIDs) > 0 {
query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...))
}
if len(options.ExcludedLabelIDs) > 0 {
q := &inner_meilisearch.FilterAnd{}
for _, labelID := range options.ExcludedLabelIDs {
q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID)))
}
query.And(q)
}
}
if len(options.MilestoneIDs) > 0 {
query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...))
}
if options.ProjectID.Has() {
query.And(inner_meilisearch.NewFilterEq("project_id", options.ProjectID.Value()))
}
if options.ProjectColumnID.Has() {
query.And(inner_meilisearch.NewFilterEq("project_board_id", options.ProjectColumnID.Value()))
}
if options.PosterID != "" {
// "(none)" becomes 0, it means no poster
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
query.And(inner_meilisearch.NewFilterEq("poster_id", posterIDInt64))
}
if options.AssigneeID != "" {
if options.AssigneeID == "(any)" {
query.And(inner_meilisearch.NewFilterGte("assignee_id", 1))
} else {
// "(none)" becomes 0, it means no assignee
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
query.And(inner_meilisearch.NewFilterEq("assignee_id", assigneeIDInt64))
}
}
if options.MentionID.Has() {
query.And(inner_meilisearch.NewFilterEq("mention_ids", options.MentionID.Value()))
}
if options.ReviewedID.Has() {
query.And(inner_meilisearch.NewFilterEq("reviewed_ids", options.ReviewedID.Value()))
}
if options.ReviewRequestedID.Has() {
query.And(inner_meilisearch.NewFilterEq("review_requested_ids", options.ReviewRequestedID.Value()))
}
if options.SubscriberID.Has() {
query.And(inner_meilisearch.NewFilterEq("subscriber_ids", options.SubscriberID.Value()))
}
if options.UpdatedAfterUnix.Has() {
query.And(inner_meilisearch.NewFilterGte("updated_unix", options.UpdatedAfterUnix.Value()))
}
if options.UpdatedBeforeUnix.Has() {
query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value()))
}
if options.SortBy == "" {
options.SortBy = internal.SortByCreatedAsc
}
sortBy := []string{
parseSortBy(options.SortBy),
"id:desc",
}
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
counting := limit == 0
if counting {
// If set limit to 0, it will be 20 by default, and -1 is not allowed.
// See https://www.meilisearch.com/docs/reference/api/search#limit
// So set limit to 1 to make the cost as low as possible, then clear the result before returning.
limit = 1
}
keyword := options.Keyword // default to match "words"
if options.SearchMode == indexer.SearchModeExact {
// https://www.meilisearch.com/docs/reference/api/search#phrase-search
keyword = doubleQuoteKeyword(keyword)
}
searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
Filter: query.Statement(),
Limit: int64(limit),
Offset: int64(skip),
Sort: sortBy,
MatchingStrategy: "all",
})
if err != nil {
return nil, err
}
if counting {
searchRes.Hits = nil
}
hits, err := convertHits(searchRes)
if err != nil {
return nil, err
}
return &internal.SearchResult{
Total: searchRes.EstimatedTotalHits,
Hits: hits,
}, nil
}
func parseSortBy(sortBy internal.SortBy) string {
field := strings.TrimPrefix(string(sortBy), "-")
if strings.HasPrefix(string(sortBy), "-") {
return field + ":desc"
}
return field + ":asc"
}
func doubleQuoteKeyword(k string) string {
kp := strings.Split(k, " ")
parts := 0
for i := range kp {
part := strings.Trim(kp[i], "\"")
if part != "" {
kp[parts] = fmt.Sprintf(`"%s"`, part)
parts++
}
}
return strings.Join(kp[:parts], " ")
}
func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) {
hits := make([]internal.Match, 0, len(searchRes.Hits))
for _, hit := range searchRes.Hits {
hit, ok := hit.(map[string]any)
if !ok {
return nil, ErrMalformedResponse
}
issueID, ok := hit["id"].(float64)
if !ok {
return nil, ErrMalformedResponse
}
hits = append(hits, internal.Match{
ID: int64(issueID),
})
}
return hits, nil
}

View File

@@ -0,0 +1,86 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package meilisearch
import (
"fmt"
"net/http"
"os"
"testing"
"time"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/indexer/issues/internal/tests"
"github.com/meilisearch/meilisearch-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMeilisearchIndexer(t *testing.T) {
// The meilisearch instance started by pull-db-tests.yml > test-unit > services > meilisearch
url := "http://meilisearch:7700"
key := "" // auth has been disabled in test environment
if os.Getenv("CI") == "" {
// Make it possible to run tests against a local meilisearch instance
url = os.Getenv("TEST_MEILISEARCH_URL")
if url == "" {
t.Skip("TEST_MEILISEARCH_URL not set and not running in CI")
return
}
key = os.Getenv("TEST_MEILISEARCH_KEY")
}
require.Eventually(t, func() bool {
resp, err := http.Get(url)
return err == nil && resp.StatusCode == http.StatusOK
}, time.Minute, time.Second, "Expected meilisearch to be up")
indexer := NewIndexer(url, key, fmt.Sprintf("test_meilisearch_indexer_%d", time.Now().Unix()))
defer indexer.Close()
tests.TestIndexer(t, indexer)
}
func TestConvertHits(t *testing.T) {
_, err := convertHits(&meilisearch.SearchResponse{
Hits: []any{"aa", "bb", "cc", "dd"},
})
assert.ErrorIs(t, err, ErrMalformedResponse)
validResponse := &meilisearch.SearchResponse{
Hits: []any{
map[string]any{
"id": float64(11),
"title": "a title",
"content": "issue body with no match",
"comments": []any{"hey whats up?", "I'm currently bowling", "nice"},
},
map[string]any{
"id": float64(22),
"title": "Bowling as title",
"content": "",
"comments": []any{},
},
map[string]any{
"id": float64(33),
"title": "Bowl-ing as fuzzy match",
"content": "",
"comments": []any{},
},
},
}
hits, err := convertHits(validResponse)
assert.NoError(t, err)
assert.Equal(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
}
func TestDoubleQuoteKeyword(t *testing.T) {
assert.Empty(t, doubleQuoteKeyword(""))
assert.Equal(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c"))
assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`))
}

View File

@@ -0,0 +1,199 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package issues
import (
"context"
"errors"
"fmt"
"code.gitea.io/gitea/models/db"
issue_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/queue"
)
// getIssueIndexerData returns the indexer data of an issue and a bool value indicating whether the issue exists.
func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerData, bool, error) {
issue, err := issue_model.GetIssueByID(ctx, issueID)
if err != nil {
if issue_model.IsErrIssueNotExist(err) {
return nil, false, nil
}
return nil, false, err
}
// FIXME: what if users want to search for a review comment of a pull request?
// The comment type is CommentTypeCode or CommentTypeReview.
// But LoadDiscussComments only loads CommentTypeComment.
if err := issue.LoadDiscussComments(ctx); err != nil {
return nil, false, err
}
comments := make([]string, 0, len(issue.Comments))
for _, comment := range issue.Comments {
if comment.Content != "" {
// what ever the comment type is, index the content if it is not empty.
comments = append(comments, comment.Content)
}
}
if err := issue.LoadAttributes(ctx); err != nil {
return nil, false, err
}
labels := make([]int64, 0, len(issue.Labels))
for _, label := range issue.Labels {
labels = append(labels, label.ID)
}
mentionIDs, err := issue_model.GetIssueMentionIDs(ctx, issueID)
if err != nil {
return nil, false, err
}
var (
reviewedIDs []int64
reviewRequestedIDs []int64
)
{
reviews, err := issue_model.FindReviews(ctx, issue_model.FindReviewOptions{
ListOptions: db.ListOptionsAll,
IssueID: issueID,
OfficialOnly: false,
})
if err != nil {
return nil, false, err
}
reviewedIDsSet := make(container.Set[int64], len(reviews))
reviewRequestedIDsSet := make(container.Set[int64], len(reviews))
for _, review := range reviews {
if review.Type == issue_model.ReviewTypeRequest {
reviewRequestedIDsSet.Add(review.ReviewerID)
} else {
reviewedIDsSet.Add(review.ReviewerID)
}
}
reviewedIDs = reviewedIDsSet.Values()
reviewRequestedIDs = reviewRequestedIDsSet.Values()
}
subscriberIDs, err := issue_model.GetIssueWatchersIDs(ctx, issue.ID, true)
if err != nil {
return nil, false, err
}
var projectID int64
if issue.Project != nil {
projectID = issue.Project.ID
}
projectColumnID, err := issue.ProjectColumnID(ctx)
if err != nil {
return nil, false, err
}
return &internal.IndexerData{
ID: issue.ID,
RepoID: issue.RepoID,
IsPublic: !issue.Repo.IsPrivate,
Title: issue.Title,
Content: issue.Content,
Comments: comments,
IsPull: issue.IsPull,
IsClosed: issue.IsClosed,
IsArchived: issue.Repo.IsArchived,
LabelIDs: labels,
NoLabel: len(labels) == 0,
MilestoneID: issue.MilestoneID,
ProjectID: projectID,
ProjectColumnID: projectColumnID,
PosterID: issue.PosterID,
AssigneeID: issue.AssigneeID,
MentionIDs: mentionIDs,
ReviewedIDs: reviewedIDs,
ReviewRequestedIDs: reviewRequestedIDs,
SubscriberIDs: subscriberIDs,
UpdatedUnix: issue.UpdatedUnix,
CreatedUnix: issue.CreatedUnix,
DeadlineUnix: issue.DeadlineUnix,
CommentCount: int64(len(issue.Comments)),
}, true, nil
}
func updateRepoIndexer(ctx context.Context, repoID int64) error {
ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID)
if err != nil {
return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err)
}
for _, id := range ids {
if err := updateIssueIndexer(ctx, id); err != nil {
return err
}
}
return nil
}
func updateIssueIndexer(ctx context.Context, issueID int64) error {
return pushIssueIndexerQueue(ctx, &IndexerMetadata{ID: issueID})
}
func deleteRepoIssueIndexer(ctx context.Context, repoID int64) error {
var ids []int64
ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID)
if err != nil {
return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err)
}
if len(ids) == 0 {
return nil
}
return pushIssueIndexerQueue(ctx, &IndexerMetadata{
IDs: ids,
IsDelete: true,
})
}
type keepRetryKey struct{}
// contextWithKeepRetry returns a context with a key indicating that the indexer should keep retrying.
// Please note that it's for background tasks only, and it should not be used for user requests, or it may cause blocking.
func contextWithKeepRetry(ctx context.Context) context.Context {
return context.WithValue(ctx, keepRetryKey{}, true)
}
func pushIssueIndexerQueue(ctx context.Context, data *IndexerMetadata) error {
if issueIndexerQueue == nil {
// Some unit tests will trigger indexing, but the queue is not initialized.
// It's OK to ignore it, but log a warning message in case it's not a unit test.
log.Warn("Trying to push %+v to issue indexer queue, but the queue is not initialized, it's OK if it's a unit test", data)
return nil
}
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
err := issueIndexerQueue.Push(data)
if errors.Is(err, queue.ErrAlreadyInQueue) {
return nil
}
if errors.Is(err, context.DeadlineExceeded) { // the queue is full
log.Warn("It seems that issue indexer is slow and the queue is full. Please check the issue indexer or increase the queue size.")
if ctx.Value(keepRetryKey{}) == nil {
return err
}
// It will be better to increase the queue size instead of retrying, but users may ignore the previous warning message.
// However, even it retries, it may still cause index loss when there's a deadline in the context.
log.Debug("Retry to push %+v to issue indexer queue", data)
continue
}
return err
}
}