Fix VLLM_CPU_KVCACHE_SPACE and wrong model id in tests

Signed-off-by: Yao, Qing <qing.yao@intel.com>
This commit is contained in:
Yao, Qing
2025-05-15 16:41:22 +08:00
parent b02db2ad40
commit 99b3338649
5 changed files with 7 additions and 5 deletions

View File

@@ -41,6 +41,7 @@ services:
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
VLLM_CPU_KVCACHE_SPACE: 40
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s

View File

@@ -52,6 +52,7 @@ services:
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
NUM_CARDS: ${NUM_CARDS:-1}
VLLM_TORCH_PROFILER_DIR: "/mnt"
VLLM_CPU_KVCACHE_SPACE: 40
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s

View File

@@ -49,7 +49,7 @@ function start_services() {
local llm_container_name="$2"
cd $WORKPATH/docker_compose
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}
source set_env.sh
@@ -117,7 +117,7 @@ function validate_microservices() {
"completion_tokens" \
"llm-service" \
"${llm_container_name}" \
'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "def print_hello_world():"}], "max_tokens": 256}'
'{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "def print_hello_world():"}], "max_tokens": 256}'
# llm microservice
validate_services \

View File

@@ -51,7 +51,7 @@ function start_services() {
local llm_container_name="$2"
cd $WORKPATH/docker_compose
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
source set_env.sh
cd intel/cpu/xeon/
@@ -118,7 +118,7 @@ function validate_microservices() {
"completion_tokens" \
"llm-service" \
"${llm_container_name}" \
'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
'{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
# llm microservice
validate_services \

View File

@@ -93,7 +93,7 @@ function validate_microservices() {
"content" \
"codegen-vllm-service" \
"codegen-vllm-service" \
'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
'{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
sleep 10
# llm microservice
validate_services \