From 34399cfd7a618198822ddb2d0052adc39e5568e4 Mon Sep 17 00:00:00 2001 From: sillyguodong <33891828+sillyguodong@users.noreply.github.com> Date: Sun, 12 Feb 2023 18:09:03 +0800 Subject: [PATCH] Make issue and code search support camel case (#22829) Fixes #22714 ### Changes: 1. Add a token filter which named "camelCase" between custom unicode token filter and "to_lower" token filter when add custom analyzer. ### Notice: If users want this feature to work, they should delete folder under {giteaPath}/data/indexers and restart application. Then application will create a new IndexMapping. ### Screenshots: ![image](https://user-images.githubusercontent.com/33891828/217715692-c18c41f2-57a1-4727-861c-470935c8e0c8.png) ### Others: I originally attempted to give users the ability to configure the "token_filters" in the "app.ini" file. But I found that if users does not strictly follow a right order to register "token_filters", they won't get the expected results. I think it is difficult to ask users to do this. So I finally give up this idea. --------- Co-authored-by: Jason Song Co-authored-by: Lunny Xiao --- modules/indexer/code/bleve.go | 5 +++-- modules/indexer/issues/bleve.go | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 6ee2639d3..e9085f410 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -27,6 +27,7 @@ import ( "github.com/blevesearch/bleve/v2" analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" @@ -107,7 +108,7 @@ func (d *RepoIndexerData) Type() string { const ( repoIndexerAnalyzer = "repoIndexerAnalyzer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 5 + repoIndexerLatestVersion = 6 ) // createBleveIndexer create a bleve repo indexer if one does not already exist @@ -138,7 +139,7 @@ func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) { "type": analyzer_custom.Name, "char_filters": []string{}, "tokenizer": unicode.Name, - "token_filters": []string{unicodeNormalizeName, lowercase.Name}, + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, }); err != nil { return nil, err } diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index 952bddfb2..e3ef9af5b 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -15,6 +15,7 @@ import ( "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" @@ -27,7 +28,7 @@ import ( const ( issueIndexerAnalyzer = "issueIndexer" issueIndexerDocType = "issueIndexerDocType" - issueIndexerLatestVersion = 1 + issueIndexerLatestVersion = 2 ) // indexerID a bleve-compatible unique identifier for an integer id @@ -134,7 +135,7 @@ func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) { "type": custom.Name, "char_filters": []string{}, "tokenizer": unicode.Name, - "token_filters": []string{unicodeNormalizeName, lowercase.Name}, + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, }); err != nil { return nil, err }