Update vLLM parameter max-seq-len-to-capture (#1809)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2025-04-15 14:27:12 +08:00
parent a222d1cfbb
commit 13dd27e6d5
15 changed files with 15 additions and 15 deletions
--- a/AgentQnA/kubernetes/helm/cpu-values.yaml
+++ b/AgentQnA/kubernetes/helm/cpu-values.yaml
@@ -6,7 +6,7 @@ tgi:
 vllm:
  enabled: true
  LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
-  extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+  extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]

 supervisor:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
--- a/AgentQnA/kubernetes/helm/gaudi-values.yaml
+++ b/AgentQnA/kubernetes/helm/gaudi-values.yaml
@@ -19,7 +19,7 @@ vllm:
  PT_HPU_ENABLE_LAZY_COLLECTIVES: true
  VLLM_SKIP_WARMUP: true
  shmSize: 16Gi
-  extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+  extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]

 supervisor:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm