Fix VLLM_CPU_KVCACHE_SPACE and wrong model id in tests

Signed-off-by: Yao, Qing <qing.yao@intel.com>
Fix issue
2025-05-15 16:41:22 +08:00 · 2025-05-07 16:51:45 +08:00 · 2025-04-24 16:40:44 +08:00 · 2025-04-24 16:35:13 +08:00 · 2025-04-24 16:34:08 +08:00 · 2025-04-24 12:01:24 +08:00
9 changed files with 63 additions and 99 deletions
--- a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh
@@ -5,8 +5,8 @@
 # SPDX-License-Identifier: Apache-2.0

 ### The IP address or domain name of the server on which the application is running
-export HOST_IP=''
-export EXTERNAL_HOST_IP=''
+export HOST_IP=${ip_address}
+export EXTERNAL_HOST_IP=${ip_address}

 ### The port of the TGI service. On this port, the TGI service will accept connections
 export CODEGEN_TGI_SERVICE_PORT=8028
@@ -36,4 +36,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
 export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}

 ### The CodeGen service UI port
-export CODEGEN_UI_SERVICE_PORT=18151
+export CODEGEN_UI_SERVICE_PORT=5173
--- a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -5,8 +5,8 @@
 # SPDX-License-Identifier: Apache-2.0

 ### The IP address or domain name of the server on which the application is running
-export HOST_IP=''
-export EXTERNAL_HOST_IP=''
+export HOST_IP=${ip_address}
+export EXTERNAL_HOST_IP=${ip_address}

 ### The port of the vLLM service. On this port, the TGI service will accept connections
 export CODEGEN_VLLM_SERVICE_PORT=8028
@@ -25,7 +25,7 @@ export CODEGEN_LLM_SERVICE_PORT=9000
 export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}

 ### The port for CodeGen backend service
-export CODEGEN_BACKEND_SERVICE_PORT=18150
+export CODEGEN_BACKEND_SERVICE_PORT=7778

 ### The URL of CodeGen backend service, used by the frontend service
 export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
@@ -34,4 +34,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
 export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}

 ### The CodeGen service UI port
-export CODEGEN_UI_SERVICE_PORT=18151
+export CODEGEN_UI_SERVICE_PORT=5173
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,6 +41,7 @@ services:
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
+      VLLM_CPU_KVCACHE_SPACE: 40
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
      interval: 10s
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -52,6 +52,7 @@ services:
      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
      NUM_CARDS: ${NUM_CARDS:-1}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
+      VLLM_CPU_KVCACHE_SPACE: 40
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
      interval: 10s
--- a/CodeGen/tests/README.md
+++ b/CodeGen/tests/README.md
@@ -0,0 +1,33 @@
+# CodeGen E2E test scripts
+
+## Set the required environment variable
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+```
+
+## Run test
+
+On Intel Xeon with TGI:
+
+```bash
+bash test_compose_on_xeon.sh
+```
+
+On Intel Gaudi with TGI:
+
+```bash
+bash test_compose_on_gaudi.sh
+```
+
+On AMD ROCm with TGI:
+
+```bash
+bash test_compose_on_rocm.sh
+```
+
+On AMD ROCm with vLLM:
+
+```bash
+bash test_compose_vllm_on_rocm.sh
+```
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -10,21 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
-export REDIS_DB_PORT=6379
-export REDIS_INSIGHTS_PORT=8001
-export REDIS_RETRIEVER_PORT=7000
-export EMBEDDER_PORT=6000
-export TEI_EMBEDDER_PORT=8090
-export DATAPREP_REDIS_PORT=6007

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

-export http_proxy=${http_proxy}
-export https_proxy=${https_proxy}
-export no_proxy=${no_proxy},${ip_address}
-
 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
@@ -58,29 +48,12 @@ function start_services() {
    local compose_profile="$1"
    local llm_container_name="$2"

-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-
-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export LLM_ENDPOINT="http://${ip_address}:8028"
+    cd $WORKPATH/docker_compose
+    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export MEGA_SERVICE_PORT=7778
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export LLM_SERVICE_HOST_IP=${ip_address}
-    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
-    export NUM_CARDS=1
    export host_ip=${ip_address}
-
-    export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
-    export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-    export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
-    export INDEX_NAME="CodeGen"
-
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export TEI_EMBEDDING_HOST_IP=${host_ip}
-    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
-    export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
-
-    export INDEX_NAME="CodeGen"
+    source set_env.sh
+    cd intel/hpu/gaudi

    # Start Docker Containers
    docker compose --profile ${compose_profile} up -d | tee ${LOG_PATH}/start_services_with_compose.log
@@ -144,7 +117,7 @@ function validate_microservices() {
        "completion_tokens" \
        "llm-service" \
        "${llm_container_name}" \
-        '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "def print_hello_world():"}], "max_tokens": 256}'
+        '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "def print_hello_world():"}], "max_tokens": 256}'

    # llm microservice
    validate_services \
@@ -176,7 +149,7 @@ function validate_megaservice() {
    # Curl the Mega Service with index_name and agents_flag
    validate_services \
        "${ip_address}:7778/v1/codegen" \
-        "" \
+        "fingerprint" \
        "mega-codegen" \
        "codegen-gaudi-backend-server" \
        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
@@ -225,8 +198,9 @@ function validate_gradio() {

 function stop_docker() {
    local docker_profile="$1"
-
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    cd $WORKPATH/docker_compose
+    source set_env.sh
+    cd intel/hpu/gaudi
    docker compose --profile ${docker_profile} down
 }

--- a/CodeGen/tests/test_compose_on_rocm.sh
+++ b/CodeGen/tests/test_compose_on_rocm.sh
@@ -41,18 +41,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
-    export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export CODEGEN_TGI_SERVICE_PORT=8028
-    export CODEGEN_TGI_LLM_ENDPOINT="http://${ip_address}:${CODEGEN_TGI_SERVICE_PORT}"
-    export CODEGEN_LLM_SERVICE_PORT=9000
-    export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_BACKEND_SERVICE_PORT=7778
-    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
-    export CODEGEN_UI_SERVICE_PORT=5173
-    export HOST_IP=${ip_address}
+    source set_env.sh

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -10,21 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
-export REDIS_DB_PORT=6379
-export REDIS_INSIGHTS_PORT=8001
-export REDIS_RETRIEVER_PORT=7000
-export EMBEDDER_PORT=6000
-export TEI_EMBEDDER_PORT=8090
-export DATAPREP_REDIS_PORT=6007

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

-export http_proxy=${http_proxy}
-export https_proxy=${https_proxy}
-export no_proxy=${no_proxy},${ip_address}
-
 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
@@ -60,26 +50,11 @@ function start_services() {
    local compose_profile="$1"
    local llm_container_name="$2"

-    cd $WORKPATH/docker_compose/intel/cpu/xeon/
-
-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export LLM_ENDPOINT="http://${ip_address}:8028"
+    cd $WORKPATH/docker_compose
+    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export MEGA_SERVICE_PORT=7778
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export LLM_SERVICE_HOST_IP=${ip_address}
-    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
-    export host_ip=${ip_address}
-
-    export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
-    export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-    export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
-    export INDEX_NAME="CodeGen"
-
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export TEI_EMBEDDING_HOST_IP=${host_ip}
-    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
-    export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
+    source set_env.sh
+    cd intel/cpu/xeon/

    # Start Docker Containers
    docker compose --profile ${compose_profile} up -d > ${LOG_PATH}/start_services_with_compose.log
@@ -143,7 +118,7 @@ function validate_microservices() {
        "completion_tokens" \
        "llm-service" \
        "${llm_container_name}" \
-        '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
+        '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'

    # llm microservice
    validate_services \
@@ -175,7 +150,7 @@ function validate_megaservice() {
    # Curl the Mega Service with index_name and agents_flag
    validate_services \
        "${ip_address}:7778/v1/codegen" \
-        "" \
+        "fingerprint" \
        "mega-codegen" \
        "codegen-xeon-backend-server" \
        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
@@ -225,7 +200,9 @@ function validate_gradio() {
 function stop_docker() {
    local docker_profile="$1"

-    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    cd $WORKPATH/docker_compose
+    source set_env.sh
+    cd intel/cpu/xeon/
    docker compose --profile ${docker_profile} down
 }

--- a/CodeGen/tests/test_compose_vllm_on_rocm.sh
+++ b/CodeGen/tests/test_compose_vllm_on_rocm.sh
@@ -40,18 +40,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
-    export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export CODEGEN_VLLM_SERVICE_PORT=8028
-    export CODEGEN_VLLM_ENDPOINT="http://${ip_address}:${CODEGEN_VLLM_SERVICE_PORT}"
-    export CODEGEN_LLM_SERVICE_PORT=9000
-    export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_BACKEND_SERVICE_PORT=7778
-    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
-    export CODEGEN_UI_SERVICE_PORT=5173
-    export HOST_IP=${ip_address}
+    source set_env.sh

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

@@ -104,7 +93,7 @@ function validate_microservices() {
        "content" \
        "codegen-vllm-service" \
        "codegen-vllm-service" \
-        '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
+        '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
    sleep 10
    # llm microservice
    validate_services \
Author	SHA1	Message	Date
Yao, Qing	99b3338649	Fix VLLM_CPU_KVCACHE_SPACE and wrong model id in tests Signed-off-by: Yao, Qing <qing.yao@intel.com>	2025-05-15 16:41:22 +08:00
ZePan110	b02db2ad40	Fix issue Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-05-07 16:51:45 +08:00
ZePan110	dd232736e5	test Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 16:40:44 +08:00
ZePan110	a82caef698	Merge branch 'enhance' of https://github.com/opea-project/GenAIExamples into enhance Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 16:35:13 +08:00
Ying Hu	2dc2ba1d5c	Merge branch 'main' into enhance	2025-04-24 16:34:08 +08:00
ZePan110	15c62bfb7a	Fix issue Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 12:01:24 +08:00
ZePan110	aebb69cd75	Integrate CodeGen set_env to ut scripts. Add README.md for CodeGen UT scripts. Optimization test check. Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-24 12:01:24 +08:00
ZePan110	6287f7945a	Fix issue Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-23 16:41:57 +08:00
ZePan110	d1b5113ce0	Integrate CodeGen set_env to ut scripts. Add README.md for CodeGen UT scripts. Optimization test check. Signed-off-by: ZePan110 <ze.pan@intel.com>	2025-04-23 15:52:35 +08:00