Merge branch 'main' into fix-gzip

This commit is contained in:
wxiaoguang 2024-05-02 08:39:15 +08:00 committed by GitHub
commit bb293cf161
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 26 additions and 22 deletions

View File

@ -1456,7 +1456,7 @@ LEVEL = Info
;; Batch size to send for batched queues
;BATCH_LENGTH = 20
;;
;; Connection string for redis queues this will store the redis or redis-cluster connection string.
;; Connection string for redis queues this will store the redis (or Redis cluster) connection string.
;; When `TYPE` is `persistable-channel`, this provides a directory for the underlying leveldb
;; or additional options of the form `leveldb://path/to/db?option=value&....`, and will override `DATADIR`.
;CONN_STR = "redis://127.0.0.1:6379/0"
@ -1740,9 +1740,8 @@ LEVEL = Info
;; For "memory" only, GC interval in seconds, default is 60
;INTERVAL = 60
;;
;; For "redis", "redis-cluster" and "memcache", connection host address
;; redis: `redis://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
;; redis-cluster: `redis+cluster://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
;; For "redis" and "memcache", connection host address
;; redis: `redis://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s` (or `redis+cluster://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s` for a Redis cluster)
;; memcache: `127.0.0.1:11211`
;; twoqueue: `{"size":50000,"recent_ratio":0.25,"ghost_ratio":0.5}` or `50000`
;HOST =
@ -1772,15 +1771,14 @@ LEVEL = Info
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Either "memory", "file", "redis", "redis-cluster", "db", "mysql", "couchbase", "memcache" or "postgres"
;; Either "memory", "file", "redis", "db", "mysql", "couchbase", "memcache" or "postgres"
;; Default is "memory". "db" will reuse the configuration in [database]
;PROVIDER = memory
;;
;; Provider config options
;; memory: doesn't have any config yet
;; file: session file path, e.g. `data/sessions`
;; redis: `redis://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
;; redis-cluster: `redis+cluster://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
;; redis: `redis://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s` (or `redis+cluster://127.0.0.1:6379/0?pool_size=100&idle_timeout=180s` for a Redis cluster)
;; mysql: go-sql-driver/mysql dsn config string, e.g. `root:password@/session_table`
;PROVIDER_CONFIG = data/sessions ; Relative paths will be made absolute against _`AppWorkPath`_.
;;

View File

@ -492,7 +492,7 @@ Configuration at `[queue]` will set defaults for queues with overrides for indiv
- `DATADIR`: **queues/common**: Base DataDir for storing level queues. `DATADIR` for individual queues can be set in `queue.name` sections. Relative paths will be made absolute against `%(APP_DATA_PATH)s`.
- `LENGTH`: **100000**: Maximal queue size before channel queues block
- `BATCH_LENGTH`: **20**: Batch data before passing to the handler
- `CONN_STR`: **redis://127.0.0.1:6379/0**: Connection string for the redis queue type. For `redis-cluster` use `redis+cluster://127.0.0.1:6379/0`. Options can be set using query params. Similarly, LevelDB options can also be set using: **leveldb://relative/path?option=value** or **leveldb:///absolute/path?option=value**, and will override `DATADIR`
- `CONN_STR`: **redis://127.0.0.1:6379/0**: Connection string for the redis queue type. If you're running a Redis cluster, use `redis+cluster://127.0.0.1:6379/0`. Options can be set using query params. Similarly, LevelDB options can also be set using: **leveldb://relative/path?option=value** or **leveldb:///absolute/path?option=value**, and will override `DATADIR`
- `QUEUE_NAME`: **_queue**: The suffix for default redis and disk queue name. Individual queues will default to **`name`**`QUEUE_NAME` but can be overridden in the specific `queue.name` section.
- `SET_NAME`: **_unique**: The suffix that will be added to the default redis and disk queue `set` name for unique queues. Individual queues will default to **`name`**`QUEUE_NAME`_`SET_NAME`_ but can be overridden in the specific `queue.name` section.
- `MAX_WORKERS`: **(dynamic)**: Maximum number of worker go-routines for the queue. Default value is "CpuNum/2" clipped to between 1 and 10.
@ -777,11 +777,11 @@ and
## Cache (`cache`)
- `ADAPTER`: **memory**: Cache engine adapter, either `memory`, `redis`, `redis-cluster`, `twoqueue` or `memcache`. (`twoqueue` represents a size limited LRU cache.)
- `ADAPTER`: **memory**: Cache engine adapter, either `memory`, `redis`, `twoqueue` or `memcache`. (`twoqueue` represents a size limited LRU cache.)
- `INTERVAL`: **60**: Garbage Collection interval (sec), for memory and twoqueue cache only.
- `HOST`: **_empty_**: Connection string for `redis`, `redis-cluster` and `memcache`. For `twoqueue` sets configuration for the queue.
- `HOST`: **_empty_**: Connection string for `redis` and `memcache`. For `twoqueue` sets configuration for the queue.
- Redis: `redis://:macaron@127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
- Redis-cluster `redis+cluster://:macaron@127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
- For a Redis cluster: `redis+cluster://:macaron@127.0.0.1:6379/0?pool_size=100&idle_timeout=180s`
- Memcache: `127.0.0.1:9090;127.0.0.1:9091`
- TwoQueue LRU cache: `{"size":50000,"recent_ratio":0.25,"ghost_ratio":0.5}` or `50000` representing the maximum number of objects stored in the cache.
- `ITEM_TTL`: **16h**: Time to keep items in cache if not used, Setting it to -1 disables caching.
@ -793,7 +793,7 @@ and
## Session (`session`)
- `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, redis-cluster, db, mysql, couchbase, memcache, postgres\]. Setting `db` will reuse the configuration in `[database]`
- `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, db, mysql, couchbase, memcache, postgres\]. Setting `db` will reuse the configuration in `[database]`
- `PROVIDER_CONFIG`: **data/sessions**: For file, the root path; for db, empty (database config will be used); for others, the connection string. Relative paths will be made absolute against _`AppWorkPath`_.
- `COOKIE_SECURE`:**_empty_**: `true` or `false`. Enable this to force using HTTPS for all session access. If not set, it defaults to `true` if the ROOT_URL is an HTTPS URL.
- `COOKIE_NAME`: **i\_like\_gitea**: The name of the cookie used for the session ID.

View File

@ -39,8 +39,6 @@ import (
const (
unicodeNormalizeName = "unicodeNormalize"
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@ -245,7 +243,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
if opts.IsKeywordFuzzy {
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
phraseQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
}
if len(opts.RepoIDs) > 0 {

View File

@ -47,3 +47,15 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
return index, 0, nil
}
func GuessFuzzinessByKeyword(s string) int {
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
for _, r := range s {
if r >= 128 {
return 0
}
}
return min(2, len(s)/4)
}

View File

@ -35,11 +35,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}
const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
const maxBatchSize = 16
// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
@ -162,7 +158,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
if options.Keyword != "" {
fuzziness := 0
if options.IsFuzzyKeyword {
fuzziness = len(options.Keyword) / fuzzyDenominator
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{

View File

@ -28,6 +28,7 @@ func Search(ctx *context.Context) {
ctx.Data["Language"] = language
ctx.Data["IsFuzzy"] = isFuzzy
ctx.Data["PageIsViewCode"] = true
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
if keyword == "" {
ctx.HTML(http.StatusOK, tplSearch)
@ -86,7 +87,6 @@ func Search(ctx *context.Context) {
}
}
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
ctx.Data["Repo"] = ctx.Repo.Repository
ctx.Data["SearchResults"] = searchResults
ctx.Data["SearchResultLanguages"] = searchResultLanguages