Update vLLM parameter max-seq-len-to-capture (#1809)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
Liang Lv
2025-04-15 14:27:12 +08:00
committed by GitHub
parent a222d1cfbb
commit 13dd27e6d5
15 changed files with 15 additions and 15 deletions

View File

@@ -110,7 +110,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server

View File

@@ -108,7 +108,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
llm-faqgen:
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
container_name: llm-faqgen-server

View File

@@ -139,7 +139,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-guardrails-server

View File

@@ -79,7 +79,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server

View File

@@ -30,7 +30,7 @@ vllm:
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048"
"--max-seq-len-to-capture", "2048"
]
# Reranking: second largest bottleneck when reranking is in use

View File

@@ -25,7 +25,7 @@ vllm:
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048"
"--max-seq-len-to-capture", "2048"
]
# Reranking: second largest bottleneck when reranking is in use

View File

@@ -90,5 +90,5 @@ vllm:
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048"
"--max-seq-len-to-capture", "2048"
]