Limit the max line length when parsing git grep output (#30418)

This commit is contained in:
wxiaoguang 2024-04-12 11:36:34 +08:00 committed by GitHub
parent 7af074dbee
commit f9fdac9809
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 4 deletions

View File

@ -10,6 +10,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"slices"
"strconv" "strconv"
"strings" "strings"
@ -27,6 +28,7 @@ type GrepOptions struct {
MaxResultLimit int MaxResultLimit int
ContextLineNumber int ContextLineNumber int
IsFuzzy bool IsFuzzy bool
MaxLineLength int // the maximum length of a line to parse, exceeding chars will be truncated
} }
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) { func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
@ -71,10 +73,20 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
defer stdoutReader.Close() defer stdoutReader.Close()
isInBlock := false isInBlock := false
scanner := bufio.NewScanner(stdoutReader) rd := bufio.NewReaderSize(stdoutReader, util.IfZero(opts.MaxLineLength, 16*1024))
var res *GrepResult var res *GrepResult
for scanner.Scan() { for {
line := scanner.Text() lineBytes, isPrefix, err := rd.ReadLine()
if isPrefix {
lineBytes = slices.Clone(lineBytes)
for isPrefix && err == nil {
_, isPrefix, err = rd.ReadLine()
}
}
if len(lineBytes) == 0 && err != nil {
break
}
line := string(lineBytes) // the memory of lineBytes is mutable
if !isInBlock { if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok { if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true isInBlock = true
@ -100,7 +112,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
res.LineCodes = append(res.LineCodes, lineCode) res.LineCodes = append(res.LineCodes, lineCode)
} }
} }
return scanner.Err() return nil
}, },
}) })
// git grep exits by cancel (killed), usually it is caused by the limit of results // git grep exits by cancel (killed), usually it is caused by the limit of results

View File

@ -41,6 +41,16 @@ func TestGrepSearch(t *testing.T) {
}, },
}, res) }, res)
res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1, MaxLineLength: 39})
assert.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] arg"},
},
}, res)
res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{}) res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, res, 0) assert.Len(t, res, 0)