Add auto truncate for embedding and rerank (#449)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
lvliang-intel
2024-07-25 09:21:06 +08:00
committed by GitHub
parent 268d58d4a9
commit 8b60948c7b
3 changed files with 4 additions and 4 deletions

View File

@@ -46,7 +46,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: opea/embedding-tei:latest
container_name: embedding-tei-server

View File

@@ -80,7 +80,7 @@ services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: opea/embedding-tei:latest
container_name: embedding-tei-server

View File

@@ -39,7 +39,7 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
deploy:
resources:
reservations:
@@ -97,7 +97,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID}
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
deploy:
resources:
reservations: