From c70b021689a38ad1187eb7a3708cb57972524c20 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 21 May 2025 12:58:07 +0800 Subject: [PATCH] Integrate CodeGen set_env to ut scripts. (#1976) Signed-off-by: ZePan110 Co-authored-by: Ying Hu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../docker_compose/amd/gpu/rocm/set_env.sh | 8 +- .../amd/gpu/rocm/set_env_vllm.sh | 8 +- .../docker_compose/intel/cpu/xeon/README.md | 113 ++++++++---------- .../docker_compose/intel/hpu/gaudi/README.md | 81 ++++++------- CodeGen/docker_compose/intel/set_env.sh | 51 ++++++++ CodeGen/docker_compose/set_env.sh | 50 -------- CodeGen/tests/README.md | 33 +++++ CodeGen/tests/test_compose_on_gaudi.sh | 34 +----- CodeGen/tests/test_compose_on_rocm.sh | 13 +- CodeGen/tests/test_compose_on_xeon.sh | 30 +---- CodeGen/tests/test_compose_vllm_on_rocm.sh | 13 +- 11 files changed, 180 insertions(+), 254 deletions(-) create mode 100644 CodeGen/docker_compose/intel/set_env.sh delete mode 100644 CodeGen/docker_compose/set_env.sh create mode 100644 CodeGen/tests/README.md diff --git a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh index 117f81667..afaa29b34 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh +++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh @@ -5,8 +5,8 @@ # SPDX-License-Identifier: Apache-2.0 ### The IP address or domain name of the server on which the application is running -export HOST_IP='' -export EXTERNAL_HOST_IP='' +export HOST_IP=${ip_address} +export EXTERNAL_HOST_IP=${ip_address} ### The port of the TGI service. On this port, the TGI service will accept connections export CODEGEN_TGI_SERVICE_PORT=8028 @@ -27,7 +27,7 @@ export CODEGEN_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}" export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP} ### The port for CodeGen backend service -export CODEGEN_BACKEND_SERVICE_PORT=18150 +export CODEGEN_BACKEND_SERVICE_PORT=7778 ### The URL of CodeGen backend service, used by the frontend service export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen" @@ -36,4 +36,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP} ### The CodeGen service UI port -export CODEGEN_UI_SERVICE_PORT=18151 +export CODEGEN_UI_SERVICE_PORT=5173 diff --git a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 52d69da19..475191539 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -5,8 +5,8 @@ # SPDX-License-Identifier: Apache-2.0 ### The IP address or domain name of the server on which the application is running -export HOST_IP='' -export EXTERNAL_HOST_IP='' +export HOST_IP=${ip_address} +export EXTERNAL_HOST_IP=${ip_address} ### The port of the vLLM service. On this port, the TGI service will accept connections export CODEGEN_VLLM_SERVICE_PORT=8028 @@ -25,7 +25,7 @@ export CODEGEN_LLM_SERVICE_PORT=9000 export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP} ### The port for CodeGen backend service -export CODEGEN_BACKEND_SERVICE_PORT=18150 +export CODEGEN_BACKEND_SERVICE_PORT=7778 ### The URL of CodeGen backend service, used by the frontend service export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen" @@ -34,4 +34,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP} ### The CodeGen service UI port -export CODEGEN_UI_SERVICE_PORT=18151 +export CODEGEN_UI_SERVICE_PORT=5173 diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 24835c859..57eda8f82 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -6,22 +6,10 @@ This README provides instructions for deploying the CodeGen application using Do - [Overview](#overview) - [Prerequisites](#prerequisites) -- [Quick Start](#quick-start) -- [Available Deployment Options](#available-deployment-options) - - [Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)](#default-vllm-based-deployment---profile-codegen-xeon-vllm) - - [TGI-based Deployment (`--profile codegen-xeon-tgi`)](#tgi-based-deployment---profile-codegen-xeon-tgi) -- [Configuration Parameters](#configuration-parameters) - - [Environment Variables](#environment-variables) - - [Compose Profiles](#compose-profiles) +- [Quick Start Deployment](#quick-start-deployment) - [Building Custom Images (Optional)](#building-custom-images-optional) - [Validate Services](#validate-services) - - [Check Container Status](#check-container-status) - - [Run Validation Script/Commands](#run-validation-scriptcommands) - [Accessing the User Interface (UI)](#accessing-the-user-interface-ui) - - [Gradio UI (Default)](#gradio-ui-default) - - [Svelte UI (Optional)](#svelte-ui-optional) - - [React UI (Optional)](#react-ui-optional) - - [VS Code Extension (Optional)](#vs-code-extension-optional) - [Troubleshooting](#troubleshooting) - [Stopping the Application](#stopping-the-application) - [Next Steps](#next-steps) @@ -43,38 +31,37 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon ``` -## Quick Start +## Quick Start Deployment This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`). 1. **Configure Environment:** Set required environment variables in your shell: - ```bash - # Replace with your host's external IP address (do not use localhost or 127.0.0.1) - export host_ip="your_external_ip_address" - # Replace with your Hugging Face Hub API token - export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" + ```bash + # Replace with your host's external IP address (do not use localhost or 127.0.0.1) + export HOST_IP="your_external_ip_address" + # Replace with your Hugging Face Hub API token + export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" - # Optional: Configure proxy if needed - # export http_proxy="your_http_proxy" - # export https_proxy="your_https_proxy" - # export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary - source ../../../set_env.sh - ``` + # Optional: Configure proxy if needed + # export http_proxy="your_http_proxy" + # export https_proxy="your_https_proxy" + # export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary + source ../../set_env.sh + ``` - _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._ - like + _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._ - ``` - export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" - ``` + For instance, edit the set_env.sh to change the LLM model - can be changed to small model if needed - - ``` - export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" - ``` + ``` + export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" + ``` + can be changed to other model if needed + ``` + export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" + ``` 2. **Start Services (vLLM Profile):** @@ -85,45 +72,45 @@ This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`). 3. **Validate:** Wait several minutes for models to download (especially the first time) and services to initialize. Check container logs (`docker compose logs -f `) or proceed to the validation steps below. -## Available Deployment Options +### Available Deployment Options The `compose.yaml` file uses Docker Compose profiles to select the LLM serving backend. -### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`) +#### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`) - **Profile:** `codegen-xeon-vllm` - **Description:** Uses vLLM optimized for Intel CPUs as the LLM serving engine. This is the default profile used in the Quick Start. - **Services Deployed:** `codegen-vllm-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. -### TGI-based Deployment (`--profile codegen-xeon-tgi`) +#### TGI-based Deployment (`--profile codegen-xeon-tgi`) - **Profile:** `codegen-xeon-tgi` - **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine. - **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. - **To Run:** ```bash - # Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set + # Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set docker compose --profile codegen-xeon-tgi up -d ``` -## Configuration Parameters +### Configuration Parameters -### Environment Variables +#### Environment Variables Key parameters are configured via environment variables set before running `docker compose up`. -| Environment Variable | Description | Default (Set Externally) | -| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- | -| `host_ip` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | -| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` | -| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | -| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` | -| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` | -| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` | -| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`. | `http://${host_ip}:7778/v1/codegen` | -| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A | -| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` | +| Environment Variable | Description | Default (Set Externally) | +| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ | +| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | +| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` | +| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | +| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-vllm | tgi-server:9000/v1/chat/completions` | +| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` | +| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` | +| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` | +| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A | +| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` | Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them. @@ -131,7 +118,7 @@ Most of these parameters are in `set_env.sh`, you can either modify this file or source CodeGen/docker_compose/set_env.sh ``` -### Compose Profiles +#### Compose Profiles Docker Compose profiles (`codegen-xeon-vllm`, `codegen-xeon-tgi`) control which LLM serving backend (vLLM or TGI) and its associated dependencies are started. Only one profile should typically be active. @@ -161,23 +148,23 @@ Check logs for specific services: `docker compose logs ` ### Run Validation Script/Commands -Use `curl` commands to test the main service endpoints. Ensure `host_ip` is correctly set in your environment. +Use `curl` commands to test the main service endpoints. Ensure `HOST_IP` is correctly set in your environment. 1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):** ```bash # This command structure targets the OpenAI-compatible vLLM endpoint - curl http://${host_ip}:9000/v1/chat/completions \ + curl http://${HOST_IP}:9000/v1/chat/completions \ -X POST \ -H 'Content-Type: application/json' \ - -d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' + -d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' ``` - **Expected Output:** A JSON response with generated code in `choices[0].message.content`. 2. **Validate CodeGen Gateway (MegaService on default port 7778):** ```bash - curl http://${host_ip}:7778/v1/codegen \ + curl http://${HOST_IP}:7778/v1/codegen \ -H "Content-Type: application/json" \ -d '{"messages": "Write a Python function that adds two numbers."}' ``` @@ -190,7 +177,7 @@ Multiple UI options can be configured via the `compose.yaml`. ### Gradio UI (Default) Access the default Gradio UI by navigating to: -`http://{host_ip}:5173` +`http://{HOST_IP}:5173` _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_ ![Gradio UI - Code Generation](../../../../assets/img/codegen_gradio_ui_main.png) @@ -200,7 +187,7 @@ _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_ 1. Modify `compose.yaml`: Comment out the `codegen-gradio-ui-server` service and uncomment/add the `codegen-xeon-ui-server` (Svelte) service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`). 2. Restart Docker Compose: `docker compose --profile up -d` -3. Access: `http://{host_ip}:5173` (or the host port you mapped). +3. Access: `http://{HOST_IP}:5173` (or the host port you mapped). ![Svelte UI Init](../../../../assets/img/codeGen_ui_init.jpg) @@ -208,7 +195,7 @@ _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_ 1. Modify `compose.yaml`: Comment out the default UI service and uncomment/add the `codegen-xeon-react-ui-server` definition, ensuring correct port mapping (e.g., `"- 5174:80"`). 2. Restart Docker Compose: `docker compose --profile up -d` -3. Access: `http://{host_ip}:5174` (or the host port you mapped). +3. Access: `http://{HOST_IP}:5174` (or the host port you mapped). ![React UI](../../../../assets/img/codegen_react.png) @@ -218,7 +205,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e 1. **Install:** Find and install `Neural Copilot` from the VS Code Marketplace. ![Install Copilot](../../../../assets/img/codegen_copilot.png) -2. **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${host_ip}:7778/v1/codegen` (use the correct port if changed). +2. **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${HOST_IP}:7778/v1/codegen` (use the correct port if changed). ![Configure Endpoint](../../../../assets/img/codegen_endpoint.png) 3. **Usage:** - **Inline Suggestion:** Type a comment describing the code you want (e.g., `# Python function to read a file`) and wait for suggestions. @@ -229,7 +216,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e ## Troubleshooting - **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access. -- **Connection Errors:** Verify `host_ip` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `host_ip` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`). +- **Connection Errors:** Verify `HOST_IP` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `HOST_IP` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`). - **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`. - **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed. diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 75366d349..e94ccbf30 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -6,23 +6,10 @@ This README provides instructions for deploying the CodeGen application using Do - [Overview](#overview) - [Prerequisites](#prerequisites) -- [Quick Start](#quick-start) -- [Available Deployment Options](#available-deployment-options) - - [Default: vLLM-based Deployment (`--profile codegen-gaudi-vllm`)](#default-vllm-based-deployment---profile-codegen-gaudi-vllm) - - [TGI-based Deployment (`--profile codegen-gaudi-tgi`)](#tgi-based-deployment---profile-codegen-gaudi-tgi) -- [Configuration Parameters](#configuration-parameters) - - [Environment Variables](#environment-variables) - - [Compose Profiles](#compose-profiles) - - [Docker Compose Gaudi Configuration](#docker-compose-gaudi-configuration) +- [Quick Start Deployment](#quick-start-deployment) - [Building Custom Images (Optional)](#building-custom-images-optional) - [Validate Services](#validate-services) - - [Check Container Status](#check-container-status) - - [Run Validation Script/Commands](#run-validation-scriptcommands) - [Accessing the User Interface (UI)](#accessing-the-user-interface-ui) - - [Gradio UI (Default)](#gradio-ui-default) - - [Svelte UI (Optional)](#svelte-ui-optional) - - [React UI (Optional)](#react-ui-optional) - - [VS Code Extension (Optional)](#vs-code-extension-optional) - [Troubleshooting](#troubleshooting) - [Stopping the Application](#stopping-the-application) - [Next Steps](#next-steps) @@ -44,7 +31,7 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co cd GenAIExamples/CodeGen/docker_compose/intel/hpu/gaudi ``` -## Quick Start +## Quick Start Deployment This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`). @@ -53,30 +40,30 @@ This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`). ```bash # Replace with your host's external IP address (do not use localhost or 127.0.0.1) - export host_ip="your_external_ip_address" + export HOST_IP="your_external_ip_address" # Replace with your Hugging Face Hub API token export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" # Optional: Configure proxy if needed # export http_proxy="your_http_proxy" # export https_proxy="your_https_proxy" - # export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary - source ../../../set_env.sh + # export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary + source ../../set_env.sh ``` _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._ - like - - ``` - export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" - ``` - - can be changed to small model if needed + For instance, edit the set_env.sh to change the LLM model ``` export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" ``` + can be changed to other model if needed + + ``` + export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" + ``` + 2. **Start Services (vLLM Profile):** ```bash @@ -105,7 +92,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b - **Other Services:** Same CPU-based services as the vLLM profile. - **To Run:** ```bash - # Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set + # Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set docker compose --profile codegen-gaudi-tgi up -d ``` @@ -115,18 +102,18 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b Key parameters are configured via environment variables set before running `docker compose up`. -| Environment Variable | Description | Default (Set Externally) | -| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- | -| `host_ip` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | -| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-32B-Instruct` | -| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | -| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` | -| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` | -| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` | -| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`. | `http://${host_ip}:7778/v1/codegen` | -| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A | -| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` | +| Environment Variable | Description | Default (Set Externally) | +| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ | +| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | +| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` | +| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | +| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `llm-codegen-vllm-server`). Configured in `compose.yaml`. | http://codegen-vllm | tgi-server:9000/v1/chat/completions` | +| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` | +| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` | +| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` | +| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A | +| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` | Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them. @@ -181,21 +168,21 @@ Check logs: `docker compose logs `. Pay attention to `vllm-gaudi-s ### Run Validation Script/Commands -Use `curl` commands targeting the main service endpoints. Ensure `host_ip` is correctly set. +Use `curl` commands targeting the main service endpoints. Ensure `HOST_IP` is correctly set. 1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):** ```bash # This command structure targets the OpenAI-compatible vLLM endpoint - curl http://${host_ip}:9000/v1/chat/completions \ + curl http://${HOST_IP}:9000/v1/chat/completions \ -X POST \ -H 'Content-Type: application/json' \ - -d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' + -d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' ``` 2. **Validate CodeGen Gateway (MegaService, default host port 7778):** ```bash - curl http://${host_ip}:7778/v1/codegen \ + curl http://${HOST_IP}:7778/v1/codegen \ -H "Content-Type: application/json" \ -d '{"messages": "Implement a sorting algorithm in Python."}' ``` @@ -208,7 +195,7 @@ UI options are similar to the Xeon deployment. ### Gradio UI (Default) Access the default Gradio UI: -`http://{host_ip}:5173` +`http://{HOST_IP}:5173` _(Port `5173` is the default host mapping)_ ![Gradio UI](../../../../assets/img/codegen_gradio_ui_main.png) @@ -217,17 +204,17 @@ _(Port `5173` is the default host mapping)_ 1. Modify `compose.yaml`: Swap Gradio service for Svelte (`codegen-gaudi-ui-server`), check port map (e.g., `5173:5173`). 2. Restart: `docker compose --profile up -d` -3. Access: `http://{host_ip}:5173` +3. Access: `http://{HOST_IP}:5173` ### React UI (Optional) 1. Modify `compose.yaml`: Swap Gradio service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`). 2. Restart: `docker compose --profile up -d` -3. Access: `http://{host_ip}:5174` +3. Access: `http://{HOST_IP}:5174` ### VS Code Extension (Optional) -Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${host_ip}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots). +Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${HOST_IP}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots). ## Troubleshooting @@ -237,7 +224,7 @@ Use the `Neural Copilot` extension configured with the CodeGen backend URL: `htt - Verify `runtime: habana` and volume mounts in `compose.yaml`. - Gaudi initialization can take significant time and memory. Monitor resource usage. - **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`, internet access, proxy settings. Check LLM service logs. -- **Connection Errors:** Verify `host_ip`, ports, and proxy settings. Use `docker ps` and check service logs. +- **Connection Errors:** Verify `HOST_IP`, ports, and proxy settings. Use `docker ps` and check service logs. ## Stopping the Application diff --git a/CodeGen/docker_compose/intel/set_env.sh b/CodeGen/docker_compose/intel/set_env.sh new file mode 100644 index 000000000..ea48c198b --- /dev/null +++ b/CodeGen/docker_compose/intel/set_env.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +pushd "../../" > /dev/null +source .set_env.sh +popd > /dev/null + +export HOST_IP=$(hostname -I | awk '{print $1}') +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then + echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" +fi + +if [ -z "${HOST_IP}" ]; then + echo "Error: HOST_IP is not set. Please set HOST_IP first." +fi + +export no_proxy=${no_proxy},${HOST_IP} +export http_proxy=${http_proxy} +export https_proxy=${https_proxy} + +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" +export LLM_SERVICE_PORT=9000 +export LLM_ENDPOINT="http://${HOST_IP}:8028" +export LLM_SERVICE_HOST_IP=${HOST_IP} +export TGI_LLM_ENDPOINT="http://${HOST_IP}:8028" + +export MEGA_SERVICE_PORT=7778 +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:7778/v1/codegen" + +export REDIS_DB_PORT=6379 +export REDIS_INSIGHTS_PORT=8001 +export REDIS_RETRIEVER_PORT=7000 +export REDIS_URL="redis://${HOST_IP}:${REDIS_DB_PORT}" +export RETRIEVAL_SERVICE_HOST_IP=${HOST_IP} +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" +export INDEX_NAME="CodeGen" + +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export EMBEDDER_PORT=6000 +export TEI_EMBEDDER_PORT=8090 +export TEI_EMBEDDING_HOST_IP=${HOST_IP} +export TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${TEI_EMBEDDER_PORT}" + +export DATAPREP_REDIS_PORT=6007 +export DATAPREP_ENDPOINT="http://${HOST_IP}:${DATAPREP_REDIS_PORT}/v1/dataprep" +export LOGFLAG=false +export MODEL_CACHE=${model_cache:-"./data"} +export NUM_CARDS=1 diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh deleted file mode 100644 index bde459add..000000000 --- a/CodeGen/docker_compose/set_env.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -pushd "../../" > /dev/null -source .set_env.sh -popd > /dev/null - -export host_ip=$(hostname -I | awk '{print $1}') -if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then - echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" -fi - -if [ -z "${host_ip}" ]; then - echo "Error: host_ip is not set. Please set host_ip first." -fi - -export no_proxy=${no_proxy},${host_ip} -export http_proxy=${http_proxy} -export https_proxy=${https_proxy} - -export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" -export LLM_SERVICE_PORT=9000 -export LLM_ENDPOINT="http://${host_ip}:8028" -export LLM_SERVICE_HOST_IP=${host_ip} -export TGI_LLM_ENDPOINT="http://${host_ip}:8028" - -export MEGA_SERVICE_PORT=7778 -export MEGA_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen" - -export REDIS_DB_PORT=6379 -export REDIS_INSIGHTS_PORT=8001 -export REDIS_RETRIEVER_PORT=7000 -export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" -export RETRIEVAL_SERVICE_HOST_IP=${host_ip} -export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" -export INDEX_NAME="CodeGen" - -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export EMBEDDER_PORT=6000 -export TEI_EMBEDDER_PORT=8090 -export TEI_EMBEDDING_HOST_IP=${host_ip} -export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" - -export DATAPREP_REDIS_PORT=6007 -export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" -export LOGFLAG=false -export MODEL_CACHE="./data" -export NUM_CARDS=1 diff --git a/CodeGen/tests/README.md b/CodeGen/tests/README.md new file mode 100644 index 000000000..4909899be --- /dev/null +++ b/CodeGen/tests/README.md @@ -0,0 +1,33 @@ +# CodeGen E2E test scripts + +## Set the required environment variable + +```bash +export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +``` + +## Run test + +On Intel Xeon with TGI: + +```bash +bash test_compose_on_xeon.sh +``` + +On Intel Gaudi with TGI: + +```bash +bash test_compose_on_gaudi.sh +``` + +On AMD ROCm with TGI: + +```bash +bash test_compose_on_rocm.sh +``` + +On AMD ROCm with vLLM: + +```bash +bash test_compose_vllm_on_rocm.sh +``` diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index 413ce5380..87acfbaa5 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -10,21 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} export TAG=${IMAGE_TAG} export MODEL_CACHE=${model_cache:-"./data"} -export REDIS_DB_PORT=6379 -export REDIS_INSIGHTS_PORT=8001 -export REDIS_RETRIEVER_PORT=7000 -export EMBEDDER_PORT=6000 -export TEI_EMBEDDER_PORT=8090 -export DATAPREP_REDIS_PORT=6007 WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') - -export http_proxy=${http_proxy} -export https_proxy=${https_proxy} -export no_proxy=${no_proxy},${ip_address} - +source $WORKPATH/docker_compose/intel/set_env.sh function build_docker_images() { opea_branch=${opea_branch:-"main"} @@ -54,28 +44,6 @@ function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" - export LLM_ENDPOINT="http://${ip_address}:8028" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_PORT=7778 - export MEGA_SERVICE_HOST_IP=${ip_address} - export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen" - export NUM_CARDS=1 - export host_ip=${ip_address} - - export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" - export RETRIEVAL_SERVICE_HOST_IP=${host_ip} - export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" - export INDEX_NAME="CodeGen" - - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export TEI_EMBEDDING_HOST_IP=${host_ip} - export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" - export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" - - export INDEX_NAME="CodeGen" - # Start Docker Containers docker compose --profile ${compose_profile} up -d | tee ${LOG_PATH}/start_services_with_compose.log diff --git a/CodeGen/tests/test_compose_on_rocm.sh b/CodeGen/tests/test_compose_on_rocm.sh index 94f006e35..173a0538f 100644 --- a/CodeGen/tests/test_compose_on_rocm.sh +++ b/CodeGen/tests/test_compose_on_rocm.sh @@ -35,18 +35,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ - - export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" - export CODEGEN_TGI_SERVICE_PORT=8028 - export CODEGEN_TGI_LLM_ENDPOINT="http://${ip_address}:${CODEGEN_TGI_SERVICE_PORT}" - export CODEGEN_LLM_SERVICE_PORT=9000 - export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address} - export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address} - export CODEGEN_BACKEND_SERVICE_PORT=7778 - export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen" - export CODEGEN_UI_SERVICE_PORT=5173 - export HOST_IP=${ip_address} + source set_env.sh sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 4aaa180ec..a50e5f0a7 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -10,20 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} export TAG=${IMAGE_TAG} export MODEL_CACHE=${model_cache:-"./data"} -export REDIS_DB_PORT=6379 -export REDIS_INSIGHTS_PORT=8001 -export REDIS_RETRIEVER_PORT=7000 -export EMBEDDER_PORT=6000 -export TEI_EMBEDDER_PORT=8090 -export DATAPREP_REDIS_PORT=6007 WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') - -export http_proxy=${http_proxy} -export https_proxy=${https_proxy} -export no_proxy=${no_proxy},${ip_address} +source $WORKPATH/docker_compose/intel/set_env.sh function build_docker_images() { opea_branch=${opea_branch:-"main"} @@ -56,25 +47,6 @@ function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" - export LLM_ENDPOINT="http://${ip_address}:8028" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_PORT=7778 - export MEGA_SERVICE_HOST_IP=${ip_address} - export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen" - export host_ip=${ip_address} - - export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" - export RETRIEVAL_SERVICE_HOST_IP=${host_ip} - export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" - export INDEX_NAME="CodeGen" - - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export TEI_EMBEDDING_HOST_IP=${host_ip} - export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" - export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" - # Start Docker Containers docker compose --profile ${compose_profile} up -d > ${LOG_PATH}/start_services_with_compose.log diff --git a/CodeGen/tests/test_compose_vllm_on_rocm.sh b/CodeGen/tests/test_compose_vllm_on_rocm.sh index 1d78f2a0d..33fef0b27 100644 --- a/CodeGen/tests/test_compose_vllm_on_rocm.sh +++ b/CodeGen/tests/test_compose_vllm_on_rocm.sh @@ -34,18 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ - - export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" - export CODEGEN_VLLM_SERVICE_PORT=8028 - export CODEGEN_VLLM_ENDPOINT="http://${ip_address}:${CODEGEN_VLLM_SERVICE_PORT}" - export CODEGEN_LLM_SERVICE_PORT=9000 - export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address} - export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address} - export CODEGEN_BACKEND_SERVICE_PORT=7778 - export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen" - export CODEGEN_UI_SERVICE_PORT=5173 - export HOST_IP=${ip_address} + source set_env_vllm.sh sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env