fix:add some explanation for oceanbase parser selection (#26071)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
longbingljw
2025-09-23 17:06:06 +08:00
committed by GitHub
parent fb6ccccc3d
commit 24b4289d6c
3 changed files with 9 additions and 3 deletions

View File

@@ -40,8 +40,12 @@ class OceanBaseVectorConfig(BaseSettings):
OCEANBASE_FULLTEXT_PARSER: str | None = Field(
description=(
"Fulltext parser to use for text indexing. Options: 'japanese_ftparser' (Japanese), "
"'thai_ftparser' (Thai), 'ik' (Chinese). Default is 'ik'"
"Fulltext parser to use for text indexing. "
"Built-in options: 'ngram' (N-gram tokenizer for English/numbers), "
"'beng' (Basic English tokenizer), 'space' (Space-based tokenizer), "
"'ngram2' (Improved N-gram tokenizer), 'ik' (Chinese tokenizer). "
"External plugins (require installation): 'japanese_ftparser' (Japanese tokenizer), "
"'thai_ftparser' (Thai tokenizer). Default is 'ik'"
),
default="ik",
)

View File

@@ -123,7 +123,7 @@ class OceanBaseVector(BaseVector):
# Get parser from config or use default ik parser
parser_name = dify_config.OCEANBASE_FULLTEXT_PARSER or "ik"
allowed_parsers = ["ik", "japanese_ftparser", "thai_ftparser"]
allowed_parsers = ["ngram", "beng", "space", "ngram2", "ik", "japanese_ftparser", "thai_ftparser"]
if parser_name not in allowed_parsers:
raise ValueError(
f"Invalid OceanBase full-text parser: {parser_name}. "

View File

@@ -655,6 +655,8 @@ LINDORM_USING_UGC=True
LINDORM_QUERY_TIMEOUT=1
# OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase`
# Built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik`
# External fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser`
OCEANBASE_VECTOR_HOST=oceanbase
OCEANBASE_VECTOR_PORT=2881
OCEANBASE_VECTOR_USER=root@test