Update vLLM parameter max-seq-len-to-capture (#1809)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
Liang Lv
2025-04-15 14:27:12 +08:00
committed by GitHub
parent a222d1cfbb
commit 13dd27e6d5
15 changed files with 15 additions and 15 deletions

View File

@@ -6,7 +6,7 @@ tgi:
vllm:
enabled: true
LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-vllm

View File

@@ -19,7 +19,7 @@ vllm:
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
VLLM_SKIP_WARMUP: true
shmSize: 16Gi
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-vllm