Update vLLM parameter max-seq-len-to-capture (#1809)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
@@ -6,7 +6,7 @@ tgi:
|
||||
vllm:
|
||||
enabled: true
|
||||
LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
|
||||
extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
|
||||
|
||||
supervisor:
|
||||
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||
|
||||
@@ -19,7 +19,7 @@ vllm:
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
VLLM_SKIP_WARMUP: true
|
||||
shmSize: 16Gi
|
||||
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
|
||||
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
|
||||
|
||||
supervisor:
|
||||
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||
|
||||
Reference in New Issue
Block a user