Fix VLLM_CPU_KVCACHE_SPACE and wrong model id in tests

Signed-off-by: Yao, Qing <qing.yao@intel.com>
Fix issue
2025-05-15 16:41:22 +08:00 · 2025-05-07 16:51:45 +08:00 · 2025-04-24 16:40:44 +08:00 · 2025-04-24 16:35:13 +08:00 · 2025-04-24 16:34:08 +08:00 · 2025-04-24 12:01:24 +08:00
9 changed files with 63 additions and 99 deletions
--- a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh
@@ -5,8 +5,8 @@
 # SPDX-License-Identifier: Apache-2.0
 ### The IP address or domain name of the server on which the application is running
-export HOST_IP=''
+export HOST_IP=${ip_address}
-export EXTERNAL_HOST_IP=''
+export EXTERNAL_HOST_IP=${ip_address}
 ### The port of the TGI service. On this port, the TGI service will accept connections
 export CODEGEN_TGI_SERVICE_PORT=8028
@@ -36,4 +36,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
 export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}
 ### The CodeGen service UI port
-export CODEGEN_UI_SERVICE_PORT=18151
+export CODEGEN_UI_SERVICE_PORT=5173
--- a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -5,8 +5,8 @@
 # SPDX-License-Identifier: Apache-2.0
 ### The IP address or domain name of the server on which the application is running
-export HOST_IP=''
+export HOST_IP=${ip_address}
-export EXTERNAL_HOST_IP=''
+export EXTERNAL_HOST_IP=${ip_address}
 ### The port of the vLLM service. On this port, the TGI service will accept connections
 export CODEGEN_VLLM_SERVICE_PORT=8028
@@ -25,7 +25,7 @@ export CODEGEN_LLM_SERVICE_PORT=9000
 export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}
 ### The port for CodeGen backend service
-export CODEGEN_BACKEND_SERVICE_PORT=18150
+export CODEGEN_BACKEND_SERVICE_PORT=7778
 ### The URL of CodeGen backend service, used by the frontend service
 export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
@@ -34,4 +34,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
 export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}
 ### The CodeGen service UI port
-export CODEGEN_UI_SERVICE_PORT=18151
+export CODEGEN_UI_SERVICE_PORT=5173
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,6 +41,7 @@ services:
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
      VLLM_CPU_KVCACHE_SPACE: 40
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
      interval: 10s
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -52,6 +52,7 @@ services:
      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
      NUM_CARDS: ${NUM_CARDS:-1}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      VLLM_CPU_KVCACHE_SPACE: 40
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
      interval: 10s
--- a/CodeGen/tests/README.md
+++ b/CodeGen/tests/README.md
@@ -0,0 +1,33 @@
 # CodeGen E2E test scripts
 ## Set the required environment variable
 ```bash
 export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
 ```
 ## Run test
 On Intel Xeon with TGI:
 ```bash
 bash test_compose_on_xeon.sh
 ```
 On Intel Gaudi with TGI:
 ```bash
 bash test_compose_on_gaudi.sh
 ```
 On AMD ROCm with TGI:
 ```bash
 bash test_compose_on_rocm.sh
 ```
 On AMD ROCm with vLLM:
 ```bash
 bash test_compose_vllm_on_rocm.sh
 ```
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -10,21 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
 export REDIS_DB_PORT=6379
 export REDIS_INSIGHTS_PORT=8001
 export REDIS_RETRIEVER_PORT=7000
 export EMBEDDER_PORT=6000
 export TEI_EMBEDDER_PORT=8090
 export DATAPREP_REDIS_PORT=6007
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
 export no_proxy=${no_proxy},${ip_address}
 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
@@ -58,29 +48,12 @@ function start_services() {
    local compose_profile="$1"
    local llm_container_name="$2"
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    cd $WORKPATH/docker_compose
-
+    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
    export LLM_ENDPOINT="http://${ip_address}:8028"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export MEGA_SERVICE_PORT=7778
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
    export NUM_CARDS=1
    export host_ip=${ip_address}
-
+    source set_env.sh
-    export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
+    cd intel/hpu/gaudi
    export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
    export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
    export INDEX_NAME="CodeGen"
    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
    export TEI_EMBEDDING_HOST_IP=${host_ip}
    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
    export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
    export INDEX_NAME="CodeGen"
    # Start Docker Containers
    docker compose --profile ${compose_profile} up -d | tee ${LOG_PATH}/start_services_with_compose.log
@@ -144,7 +117,7 @@ function validate_microservices() {
        "completion_tokens" \
        "llm-service" \
        "${llm_container_name}" \
-        '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "def print_hello_world():"}], "max_tokens": 256}'
+        '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "def print_hello_world():"}], "max_tokens": 256}'
    # llm microservice
    validate_services \
@@ -176,7 +149,7 @@ function validate_megaservice() {
    # Curl the Mega Service with index_name and agents_flag
    validate_services \
        "${ip_address}:7778/v1/codegen" \
-        "" \
+        "fingerprint" \
        "mega-codegen" \
        "codegen-gaudi-backend-server" \
        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
@@ -225,8 +198,9 @@ function validate_gradio() {
 function stop_docker() {
    local docker_profile="$1"
-
+    cd $WORKPATH/docker_compose
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    source set_env.sh
    cd intel/hpu/gaudi
    docker compose --profile ${docker_profile} down
 }
--- a/CodeGen/tests/test_compose_on_rocm.sh
+++ b/CodeGen/tests/test_compose_on_rocm.sh
@@ -41,18 +41,7 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
+    source set_env.sh
    export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
    export CODEGEN_TGI_SERVICE_PORT=8028
    export CODEGEN_TGI_LLM_ENDPOINT="http://${ip_address}:${CODEGEN_TGI_SERVICE_PORT}"
    export CODEGEN_LLM_SERVICE_PORT=9000
    export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
    export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
    export CODEGEN_BACKEND_SERVICE_PORT=7778
    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
    export CODEGEN_UI_SERVICE_PORT=5173
    export HOST_IP=${ip_address}
    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -10,21 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
 export REDIS_DB_PORT=6379
 export REDIS_INSIGHTS_PORT=8001
 export REDIS_RETRIEVER_PORT=7000
 export EMBEDDER_PORT=6000
 export TEI_EMBEDDER_PORT=8090
 export DATAPREP_REDIS_PORT=6007
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
 export no_proxy=${no_proxy},${ip_address}
 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
@@ -60,26 +50,11 @@ function start_services() {
    local compose_profile="$1"
    local llm_container_name="$2"
-    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    cd $WORKPATH/docker_compose
-
+    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
    export LLM_ENDPOINT="http://${ip_address}:8028"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export MEGA_SERVICE_PORT=7778
+    source set_env.sh
-    export MEGA_SERVICE_HOST_IP=${ip_address}
+    cd intel/cpu/xeon/
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
    export host_ip=${ip_address}
    export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
    export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
    export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
    export INDEX_NAME="CodeGen"
    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
    export TEI_EMBEDDING_HOST_IP=${host_ip}
    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
    export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
    # Start Docker Containers
    docker compose --profile ${compose_profile} up -d > ${LOG_PATH}/start_services_with_compose.log
@@ -143,7 +118,7 @@ function validate_microservices() {
        "completion_tokens" \
        "llm-service" \
        "${llm_container_name}" \
-        '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
+        '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
    # llm microservice
    validate_services \
@@ -175,7 +150,7 @@ function validate_megaservice() {
    # Curl the Mega Service with index_name and agents_flag
    validate_services \
        "${ip_address}:7778/v1/codegen" \
-        "" \
+        "fingerprint" \
        "mega-codegen" \
        "codegen-xeon-backend-server" \
        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
@@ -225,7 +200,9 @@ function validate_gradio() {
 function stop_docker() {
    local docker_profile="$1"
-    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    cd $WORKPATH/docker_compose
    source set_env.sh
    cd intel/cpu/xeon/
    docker compose --profile ${docker_profile} down
 }
--- a/CodeGen/tests/test_compose_vllm_on_rocm.sh
+++ b/CodeGen/tests/test_compose_vllm_on_rocm.sh
@@ -40,18 +40,7 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
+    source set_env.sh
    export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
    export CODEGEN_VLLM_SERVICE_PORT=8028
    export CODEGEN_VLLM_ENDPOINT="http://${ip_address}:${CODEGEN_VLLM_SERVICE_PORT}"
    export CODEGEN_LLM_SERVICE_PORT=9000
    export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
    export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
    export CODEGEN_BACKEND_SERVICE_PORT=7778
    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
    export CODEGEN_UI_SERVICE_PORT=5173
    export HOST_IP=${ip_address}
    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -104,7 +93,7 @@ function validate_microservices() {
        "content" \
        "codegen-vllm-service" \
        "codegen-vllm-service" \
-        '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
+        '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
    sleep 10
    # llm microservice
    validate_services \
Author	SHA1	Message	Date
Yao, Qing	99b3338649	Fix VLLM_CPU_KVCACHE_SPACE and wrong model id in tests Signed-off-by: Yao, Qing <qing.yao@intel.com>	2025-05-15 16:41:22 +08:00
ZePan110	b02db2ad40	Fix issue Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-05-07 16:51:45 +08:00
ZePan110	dd232736e5	test Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 16:40:44 +08:00
ZePan110	a82caef698	Merge branch 'enhance' of https://github.com/opea-project/GenAIExamples into enhance Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 16:35:13 +08:00
Ying Hu	2dc2ba1d5c	Merge branch 'main' into enhance	2025-04-24 16:34:08 +08:00
ZePan110	15c62bfb7a	Fix issue Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 12:01:24 +08:00
ZePan110	aebb69cd75	Integrate CodeGen set_env to ut scripts. Add README.md for CodeGen UT scripts. Optimization test check. Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 12:01:24 +08:00
ZePan110	6287f7945a	Fix issue Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-23 16:41:57 +08:00
ZePan110	d1b5113ce0	Integrate CodeGen set_env to ut scripts. Add README.md for CodeGen UT scripts. Optimization test check. Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-23 15:52:35 +08:00