Refactor code_indexer to use an SearchOptions struct for PerformSearch (#29724)

similar to how it's already done for the issue_indexer


---
*Sponsored by Kithara Software GmbH*
This commit is contained in:
6543 2024-03-16 11:32:45 +01:00 committed by GitHub
parent e0ea3811c4
commit 1262ff6734
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 86 additions and 39 deletions

View File

@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
return err return err
} }
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
return fmt.Errorf("Misformatted git cat-file output: %w", err) return fmt.Errorf("misformatted git cat-file output: %w", err)
} }
} }
@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {
// Search searches for files in the specified repo. // Search searches for files in the specified repo.
// Returns the matching file-paths // Returns the matching file-paths
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var ( var (
indexerQuery query.Query indexerQuery query.Query
keywordQuery query.Query keywordQuery query.Query
) )
if isFuzzy { if opts.IsKeywordFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(keyword) phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content" phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery keywordQuery = phraseQuery
} else { } else {
prefixQuery := bleve.NewPrefixQuery(keyword) prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
prefixQuery.FieldVal = "Content" prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery keywordQuery = prefixQuery
} }
if len(repoIDs) > 0 { if len(opts.RepoIDs) > 0 {
repoQueries := make([]query.Query, 0, len(repoIDs)) repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
for _, repoID := range repoIDs { for _, repoID := range opts.RepoIDs {
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID")) repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
} }
@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
// Save for reuse without language filter // Save for reuse without language filter
facetQuery := indexerQuery facetQuery := indexerQuery
if len(language) > 0 { if len(opts.Language) > 0 {
languageQuery := bleve.NewMatchQuery(language) languageQuery := bleve.NewMatchQuery(opts.Language)
languageQuery.FieldVal = "Language" languageQuery.FieldVal = "Language"
languageQuery.Analyzer = analyzer_keyword.Name languageQuery.Analyzer = analyzer_keyword.Name
@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
) )
} }
from := (page - 1) * pageSize from, pageSize := opts.GetSkipTake()
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
searchRequest.IncludeLocations = true searchRequest.IncludeLocations = true
if len(language) == 0 { if len(opts.Language) == 0 {
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
} }
@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
} }
searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10) searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
if len(language) > 0 { if len(opts.Language) > 0 {
// Use separate query to go get all language counts // Use separate query to go get all language counts
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false) facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}

View File

@ -281,18 +281,18 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
} }
// Search searches for codes and language stats by given conditions. // Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
searchType := esMultiMatchTypePhrasePrefix searchType := esMultiMatchTypePhrasePrefix
if isFuzzy { if opts.IsKeywordFuzzy {
searchType = esMultiMatchTypeBestFields searchType = esMultiMatchTypeBestFields
} }
kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType) kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
query := elastic.NewBoolQuery() query := elastic.NewBoolQuery()
query = query.Must(kwQuery) query = query.Must(kwQuery)
if len(repoIDs) > 0 { if len(opts.RepoIDs) > 0 {
repoStrs := make([]any, 0, len(repoIDs)) repoStrs := make([]any, 0, len(opts.RepoIDs))
for _, repoID := range repoIDs { for _, repoID := range opts.RepoIDs {
repoStrs = append(repoStrs, repoID) repoStrs = append(repoStrs, repoID)
} }
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
@ -300,16 +300,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
} }
var ( var (
start int start, pageSize = opts.GetSkipTake()
kw = "<em>" + keyword + "</em>" kw = "<em>" + opts.Keyword + "</em>"
aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc() aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
) )
if page > 0 { if len(opts.Language) == 0 {
start = (page - 1) * pageSize
}
if len(language) == 0 {
searchResult, err := b.inner.Client.Search(). searchResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()). Index(b.inner.VersionedIndexName()).
Aggregation("language", aggregation). Aggregation("language", aggregation).
@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
return convertResult(searchResult, kw, pageSize) return convertResult(searchResult, kw, pageSize)
} }
langQuery := elastic.NewMatchQuery("language", language) langQuery := elastic.NewMatchQuery("language", opts.Language)
countResult, err := b.inner.Client.Search(). countResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()). Index(b.inner.VersionedIndexName()).
Aggregation("language", aggregation). Aggregation("language", aggregation).

View File

@ -32,7 +32,7 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
needGenesis := len(status.CommitSha) == 0 needGenesis := len(status.CommitSha) == 0
if !needGenesis { if !needGenesis {
hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
needGenesis = len(stdout) == 0 needGenesis = len(stdout) == 0
} }

View File

@ -8,6 +8,7 @@ import (
"os" "os"
"testing" "testing"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/bleve"
@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
for _, kw := range keywords { for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) { t.Run(kw.Keyword, func(t *testing.T) {
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true) total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
RepoIDs: kw.RepoIDs,
Keyword: kw.Keyword,
Paginator: &db.ListOptions{
Page: 1,
PageSize: 10,
},
IsKeywordFuzzy: true,
})
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, kw.IDs, int(total)) assert.Len(t, kw.IDs, int(total))
assert.Len(t, langs, kw.Langs) assert.Len(t, langs, kw.Langs)

View File

@ -7,6 +7,7 @@ import (
"context" "context"
"fmt" "fmt"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/indexer/internal"
) )
@ -16,7 +17,17 @@ type Indexer interface {
internal.Indexer internal.Indexer
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error Delete(ctx context.Context, repoID int64) error
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
}
type SearchOptions struct {
RepoIDs []int64
Keyword string
Language string
IsKeywordFuzzy bool
db.Paginator
} }
// NewDummyIndexer returns a dummy indexer // NewDummyIndexer returns a dummy indexer
@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
return fmt.Errorf("indexer is not ready") return fmt.Errorf("indexer is not ready")
} }
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
return 0, nil, nil, fmt.Errorf("indexer is not ready") return 0, nil, nil, fmt.Errorf("indexer is not ready")
} }

View File

@ -32,6 +32,8 @@ type ResultLine struct {
type SearchResultLanguages = internal.SearchResultLanguages type SearchResultLanguages = internal.SearchResultLanguages
type SearchOptions = internal.SearchOptions
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
startIndex := selectionStartIndex startIndex := selectionStartIndex
numLinesBefore := 0 numLinesBefore := 0
@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
// PerformSearch perform a search on a repository // PerformSearch perform a search on a repository
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 // if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) { func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
if len(keyword) == 0 { if opts == nil || len(opts.Keyword) == 0 {
return 0, nil, nil, nil return 0, nil, nil, nil
} }
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy) total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts)
if err != nil { if err != nil {
return 0, nil, nil, err return 0, nil, nil, err
} }

View File

@ -6,6 +6,7 @@ package explore
import ( import (
"net/http" "net/http"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
code_indexer "code.gitea.io/gitea/modules/indexer/code" code_indexer "code.gitea.io/gitea/modules/indexer/code"
@ -76,7 +77,16 @@ func Code(ctx *context.Context) {
) )
if (len(repoIDs) > 0) || isAdmin { if (len(repoIDs) > 0) || isAdmin {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: repoIDs,
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil { if err != nil {
if code_indexer.IsAvailable(ctx) { if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err) ctx.ServerError("SearchResults", err)

View File

@ -6,6 +6,7 @@ package repo
import ( import (
"net/http" "net/http"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
code_indexer "code.gitea.io/gitea/modules/indexer/code" code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -41,8 +42,16 @@ func Search(ctx *context.Context) {
page = 1 page = 1
} }
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil { if err != nil {
if code_indexer.IsAvailable(ctx) { if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err) ctx.ServerError("SearchResults", err)

View File

@ -6,6 +6,7 @@ package user
import ( import (
"net/http" "net/http"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
code_indexer "code.gitea.io/gitea/modules/indexer/code" code_indexer "code.gitea.io/gitea/modules/indexer/code"
@ -74,7 +75,16 @@ func CodeSearch(ctx *context.Context) {
) )
if len(repoIDs) > 0 { if len(repoIDs) > 0 {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: repoIDs,
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil { if err != nil {
if code_indexer.IsAvailable(ctx) { if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err) ctx.ServerError("SearchResults", err)