Update vLLM parameter max-seq-len-to-capture (#1809)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
@@ -62,7 +62,7 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
|
||||
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq-len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
|
||||
audioqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-backend-server
|
||||
|
||||
@@ -21,7 +21,7 @@ vllm:
|
||||
"--tensor-parallel-size", "1",
|
||||
"--block-size", "128",
|
||||
"--max-num-seqs", "256",
|
||||
"--max-seq_len-to-capture", "2048"
|
||||
"--max-seq-len-to-capture", "2048"
|
||||
]
|
||||
|
||||
whisper:
|
||||
|
||||
Reference in New Issue
Block a user