Compare commits

..

6 Commits

Author SHA1 Message Date
ZePan110
52a6b22f3f test
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-21 12:28:44 +08:00
ZePan110
c8259d47f9 Revert "test"
This reverts commit 2f9959f0a5.
2025-05-21 12:28:07 +08:00
ZePan110
b980d6a34c Fix issue
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-21 12:24:24 +08:00
ZePan110
2f9959f0a5 test
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-21 09:54:13 +08:00
ZePan110
51b9d3b975 Update .github/workflows/pr-code-scan.yml
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-05-20 15:32:16 +08:00
ZePan110
d9e7264a81 Fix
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-20 15:24:36 +08:00
30 changed files with 302 additions and 193 deletions

View File

@@ -3,7 +3,8 @@
# This workflow will only test GMC pipeline and will not install GMC any more
name: Single GMC E2e Test For CD Workflow Call
permissions:
contents: read
on:
workflow_call:
inputs:

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Build and deploy GMC system on call and manual
permissions:
contents: read
on:
workflow_dispatch:
inputs:

View File

@@ -2,6 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Update Docker Hub Description
permissions:
contents: read
on:
schedule:
- cron: "0 0 * * 0"

View File

@@ -2,6 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Clean up container on manual event
permissions:
contents: read
on:
workflow_dispatch:
inputs:

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Freeze OPEA images release tag
permissions:
contents: read
on:
workflow_dispatch:
inputs:

View File

@@ -2,6 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Build specific images on manual event
permissions:
contents: read
on:
workflow_dispatch:
inputs:

View File

@@ -2,6 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Clean up Local Registry on manual event
permissions:
contents: read
on:
workflow_dispatch:
inputs:

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Trellix Command Line Scanner
permissions:
contents: read
on:
workflow_dispatch:
schedule:

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Nightly build/publish latest docker images
permissions:
contents: read
on:
schedule:
- cron: "30 14 * * 1-5" # UTC time

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: E2E Test with Helm Charts
permissions:
contents: read
on:
pull_request_target:
branches: [main]

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Check Duplicated Images
permissions:
contents: read
on:
pull_request:
branches: [main]

View File

@@ -2,7 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
name: Code Scan
permissions:
contents: read
security-events: write
on:
pull_request:
branches: [main]

View File

@@ -3,6 +3,9 @@
name: E2E test with docker compose
permissions:
contents: read
on:
pull_request_target:
branches: ["main", "*rc"]

View File

@@ -2,7 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
name: Compose file and dockerfile path checking
permissions:
contents: read
on:
pull_request:
branches: [main]

View File

@@ -3,6 +3,9 @@
name: Check hyperlinks and relative path validity
permissions:
contents: read
on:
pull_request:
branches: [main]
@@ -23,7 +26,7 @@ jobs:
- name: Check the Validity of Hyperlinks
run: |
cd ${{github.workspace}}
delay=1
delay=15
fail="FALSE"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
@@ -80,7 +83,7 @@ jobs:
- name: Checking Relative Path Validity
run: |
cd ${{github.workspace}}
delay=1
delay=15
fail="FALSE"
repo_name=${{ github.event.pull_request.head.repo.full_name }}
branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"

View File

@@ -3,6 +3,9 @@
# Test
name: Build latest images on push event
permissions:
contents: read
on:
push:
branches: [ 'main' ]

View File

@@ -3,10 +3,12 @@
name: Check the validity of links in docker_images_list.
permissions:
contents: read
on:
push:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize]
jobs:
check-dockerfile-paths:

View File

@@ -8,6 +8,10 @@ on:
- "**/docker_compose/**/compose*.yaml"
name: Create an issue to GenAIInfra on push
permissions:
contents: read
jobs:
job1:
name: Create issue

View File

@@ -3,13 +3,15 @@
name: Weekly test all examples on multiple HWs
permissions: read-all
on:
schedule:
- cron: "30 2 * * 6" # UTC time
workflow_dispatch:
env:
EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
EXAMPLES: "CodeTrans" #${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
NODES: "gaudi,xeon,rocm,arc"
jobs:

View File

@@ -5,8 +5,8 @@
# SPDX-License-Identifier: Apache-2.0
### The IP address or domain name of the server on which the application is running
export HOST_IP=${ip_address}
export EXTERNAL_HOST_IP=${ip_address}
export HOST_IP=''
export EXTERNAL_HOST_IP=''
### The port of the TGI service. On this port, the TGI service will accept connections
export CODEGEN_TGI_SERVICE_PORT=8028
@@ -27,7 +27,7 @@ export CODEGEN_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}"
export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}
### The port for CodeGen backend service
export CODEGEN_BACKEND_SERVICE_PORT=7778
export CODEGEN_BACKEND_SERVICE_PORT=18150
### The URL of CodeGen backend service, used by the frontend service
export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
@@ -36,4 +36,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}
### The CodeGen service UI port
export CODEGEN_UI_SERVICE_PORT=5173
export CODEGEN_UI_SERVICE_PORT=18151

View File

@@ -5,8 +5,8 @@
# SPDX-License-Identifier: Apache-2.0
### The IP address or domain name of the server on which the application is running
export HOST_IP=${ip_address}
export EXTERNAL_HOST_IP=${ip_address}
export HOST_IP=''
export EXTERNAL_HOST_IP=''
### The port of the vLLM service. On this port, the TGI service will accept connections
export CODEGEN_VLLM_SERVICE_PORT=8028
@@ -25,7 +25,7 @@ export CODEGEN_LLM_SERVICE_PORT=9000
export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}
### The port for CodeGen backend service
export CODEGEN_BACKEND_SERVICE_PORT=7778
export CODEGEN_BACKEND_SERVICE_PORT=18150
### The URL of CodeGen backend service, used by the frontend service
export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
@@ -34,4 +34,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}
### The CodeGen service UI port
export CODEGEN_UI_SERVICE_PORT=5173
export CODEGEN_UI_SERVICE_PORT=18151

View File

@@ -6,10 +6,22 @@ This README provides instructions for deploying the CodeGen application using Do
- [Overview](#overview)
- [Prerequisites](#prerequisites)
- [Quick Start Deployment](#quick-start-deployment)
- [Quick Start](#quick-start)
- [Available Deployment Options](#available-deployment-options)
- [Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)](#default-vllm-based-deployment---profile-codegen-xeon-vllm)
- [TGI-based Deployment (`--profile codegen-xeon-tgi`)](#tgi-based-deployment---profile-codegen-xeon-tgi)
- [Configuration Parameters](#configuration-parameters)
- [Environment Variables](#environment-variables)
- [Compose Profiles](#compose-profiles)
- [Building Custom Images (Optional)](#building-custom-images-optional)
- [Validate Services](#validate-services)
- [Check Container Status](#check-container-status)
- [Run Validation Script/Commands](#run-validation-scriptcommands)
- [Accessing the User Interface (UI)](#accessing-the-user-interface-ui)
- [Gradio UI (Default)](#gradio-ui-default)
- [Svelte UI (Optional)](#svelte-ui-optional)
- [React UI (Optional)](#react-ui-optional)
- [VS Code Extension (Optional)](#vs-code-extension-optional)
- [Troubleshooting](#troubleshooting)
- [Stopping the Application](#stopping-the-application)
- [Next Steps](#next-steps)
@@ -31,37 +43,38 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co
cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon
```
## Quick Start Deployment
## Quick Start
This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).
1. **Configure Environment:**
Set required environment variables in your shell:
```bash
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
export HOST_IP="your_external_ip_address"
# Replace with your Hugging Face Hub API token
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
```bash
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
export host_ip="your_external_ip_address"
# Replace with your Hugging Face Hub API token
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
# Optional: Configure proxy if needed
# export http_proxy="your_http_proxy"
# export https_proxy="your_https_proxy"
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
source ../../set_env.sh
```
# Optional: Configure proxy if needed
# export http_proxy="your_http_proxy"
# export https_proxy="your_https_proxy"
# export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary
source ../../../set_env.sh
```
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
like
For instance, edit the set_env.sh to change the LLM model
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
```
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
```
can be changed to other model if needed
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
```
can be changed to small model if needed
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
```
2. **Start Services (vLLM Profile):**
@@ -72,45 +85,45 @@ This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).
3. **Validate:**
Wait several minutes for models to download (especially the first time) and services to initialize. Check container logs (`docker compose logs -f <service_name>`) or proceed to the validation steps below.
### Available Deployment Options
## Available Deployment Options
The `compose.yaml` file uses Docker Compose profiles to select the LLM serving backend.
#### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)
### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)
- **Profile:** `codegen-xeon-vllm`
- **Description:** Uses vLLM optimized for Intel CPUs as the LLM serving engine. This is the default profile used in the Quick Start.
- **Services Deployed:** `codegen-vllm-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
#### TGI-based Deployment (`--profile codegen-xeon-tgi`)
### TGI-based Deployment (`--profile codegen-xeon-tgi`)
- **Profile:** `codegen-xeon-tgi`
- **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine.
- **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
- **To Run:**
```bash
# Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set
# Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set
docker compose --profile codegen-xeon-tgi up -d
```
### Configuration Parameters
## Configuration Parameters
#### Environment Variables
### Environment Variables
Key parameters are configured via environment variables set before running `docker compose up`.
| Environment Variable | Description | Default (Set Externally) |
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ |
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-vllm | tgi-server:9000/v1/chat/completions` |
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
| Environment Variable | Description | Default (Set Externally) |
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
| `host_ip` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`. | `http://${host_ip}:7778/v1/codegen` |
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them.
@@ -118,7 +131,7 @@ Most of these parameters are in `set_env.sh`, you can either modify this file or
source CodeGen/docker_compose/set_env.sh
```
#### Compose Profiles
### Compose Profiles
Docker Compose profiles (`codegen-xeon-vllm`, `codegen-xeon-tgi`) control which LLM serving backend (vLLM or TGI) and its associated dependencies are started. Only one profile should typically be active.
@@ -148,23 +161,23 @@ Check logs for specific services: `docker compose logs <service_name>`
### Run Validation Script/Commands
Use `curl` commands to test the main service endpoints. Ensure `HOST_IP` is correctly set in your environment.
Use `curl` commands to test the main service endpoints. Ensure `host_ip` is correctly set in your environment.
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**
```bash
# This command structure targets the OpenAI-compatible vLLM endpoint
curl http://${HOST_IP}:9000/v1/chat/completions \
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-H 'Content-Type: application/json' \
-d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
-d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
```
- **Expected Output:** A JSON response with generated code in `choices[0].message.content`.
2. **Validate CodeGen Gateway (MegaService on default port 7778):**
```bash
curl http://${HOST_IP}:7778/v1/codegen \
curl http://${host_ip}:7778/v1/codegen \
-H "Content-Type: application/json" \
-d '{"messages": "Write a Python function that adds two numbers."}'
```
@@ -177,7 +190,7 @@ Multiple UI options can be configured via the `compose.yaml`.
### Gradio UI (Default)
Access the default Gradio UI by navigating to:
`http://{HOST_IP}:5173`
`http://{host_ip}:5173`
_(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_
![Gradio UI - Code Generation](../../../../assets/img/codegen_gradio_ui_main.png)
@@ -187,7 +200,7 @@ _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_
1. Modify `compose.yaml`: Comment out the `codegen-gradio-ui-server` service and uncomment/add the `codegen-xeon-ui-server` (Svelte) service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`).
2. Restart Docker Compose: `docker compose --profile <profile_name> up -d`
3. Access: `http://{HOST_IP}:5173` (or the host port you mapped).
3. Access: `http://{host_ip}:5173` (or the host port you mapped).
![Svelte UI Init](../../../../assets/img/codeGen_ui_init.jpg)
@@ -195,7 +208,7 @@ _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_
1. Modify `compose.yaml`: Comment out the default UI service and uncomment/add the `codegen-xeon-react-ui-server` definition, ensuring correct port mapping (e.g., `"- 5174:80"`).
2. Restart Docker Compose: `docker compose --profile <profile_name> up -d`
3. Access: `http://{HOST_IP}:5174` (or the host port you mapped).
3. Access: `http://{host_ip}:5174` (or the host port you mapped).
![React UI](../../../../assets/img/codegen_react.png)
@@ -205,7 +218,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e
1. **Install:** Find and install `Neural Copilot` from the VS Code Marketplace.
![Install Copilot](../../../../assets/img/codegen_copilot.png)
2. **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${HOST_IP}:7778/v1/codegen` (use the correct port if changed).
2. **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${host_ip}:7778/v1/codegen` (use the correct port if changed).
![Configure Endpoint](../../../../assets/img/codegen_endpoint.png)
3. **Usage:**
- **Inline Suggestion:** Type a comment describing the code you want (e.g., `# Python function to read a file`) and wait for suggestions.
@@ -216,7 +229,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e
## Troubleshooting
- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access.
- **Connection Errors:** Verify `HOST_IP` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `HOST_IP` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`).
- **Connection Errors:** Verify `host_ip` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `host_ip` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`).
- **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`.
- **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed.

View File

@@ -6,10 +6,23 @@ This README provides instructions for deploying the CodeGen application using Do
- [Overview](#overview)
- [Prerequisites](#prerequisites)
- [Quick Start Deployment](#quick-start-deployment)
- [Quick Start](#quick-start)
- [Available Deployment Options](#available-deployment-options)
- [Default: vLLM-based Deployment (`--profile codegen-gaudi-vllm`)](#default-vllm-based-deployment---profile-codegen-gaudi-vllm)
- [TGI-based Deployment (`--profile codegen-gaudi-tgi`)](#tgi-based-deployment---profile-codegen-gaudi-tgi)
- [Configuration Parameters](#configuration-parameters)
- [Environment Variables](#environment-variables)
- [Compose Profiles](#compose-profiles)
- [Docker Compose Gaudi Configuration](#docker-compose-gaudi-configuration)
- [Building Custom Images (Optional)](#building-custom-images-optional)
- [Validate Services](#validate-services)
- [Check Container Status](#check-container-status)
- [Run Validation Script/Commands](#run-validation-scriptcommands)
- [Accessing the User Interface (UI)](#accessing-the-user-interface-ui)
- [Gradio UI (Default)](#gradio-ui-default)
- [Svelte UI (Optional)](#svelte-ui-optional)
- [React UI (Optional)](#react-ui-optional)
- [VS Code Extension (Optional)](#vs-code-extension-optional)
- [Troubleshooting](#troubleshooting)
- [Stopping the Application](#stopping-the-application)
- [Next Steps](#next-steps)
@@ -31,7 +44,7 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co
cd GenAIExamples/CodeGen/docker_compose/intel/hpu/gaudi
```
## Quick Start Deployment
## Quick Start
This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).
@@ -40,30 +53,30 @@ This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).
```bash
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
export HOST_IP="your_external_ip_address"
export host_ip="your_external_ip_address"
# Replace with your Hugging Face Hub API token
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
# Optional: Configure proxy if needed
# export http_proxy="your_http_proxy"
# export https_proxy="your_https_proxy"
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
source ../../set_env.sh
# export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary
source ../../../set_env.sh
```
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
For instance, edit the set_env.sh to change the LLM model
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
```
can be changed to other model if needed
like
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
```
can be changed to small model if needed
```
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
```
2. **Start Services (vLLM Profile):**
```bash
@@ -92,7 +105,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
- **Other Services:** Same CPU-based services as the vLLM profile.
- **To Run:**
```bash
# Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set
# Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set
docker compose --profile codegen-gaudi-tgi up -d
```
@@ -102,18 +115,18 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
Key parameters are configured via environment variables set before running `docker compose up`.
| Environment Variable | Description | Default (Set Externally) |
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ |
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `llm-codegen-vllm-server`). Configured in `compose.yaml`. | http://codegen-vllm | tgi-server:9000/v1/chat/completions` |
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
| Environment Variable | Description | Default (Set Externally) |
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
| `host_ip` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-32B-Instruct` |
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`. | `http://${host_ip}:7778/v1/codegen` |
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them.
@@ -168,21 +181,21 @@ Check logs: `docker compose logs <service_name>`. Pay attention to `vllm-gaudi-s
### Run Validation Script/Commands
Use `curl` commands targeting the main service endpoints. Ensure `HOST_IP` is correctly set.
Use `curl` commands targeting the main service endpoints. Ensure `host_ip` is correctly set.
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**
```bash
# This command structure targets the OpenAI-compatible vLLM endpoint
curl http://${HOST_IP}:9000/v1/chat/completions \
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-H 'Content-Type: application/json' \
-d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
-d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
```
2. **Validate CodeGen Gateway (MegaService, default host port 7778):**
```bash
curl http://${HOST_IP}:7778/v1/codegen \
curl http://${host_ip}:7778/v1/codegen \
-H "Content-Type: application/json" \
-d '{"messages": "Implement a sorting algorithm in Python."}'
```
@@ -195,7 +208,7 @@ UI options are similar to the Xeon deployment.
### Gradio UI (Default)
Access the default Gradio UI:
`http://{HOST_IP}:5173`
`http://{host_ip}:5173`
_(Port `5173` is the default host mapping)_
![Gradio UI](../../../../assets/img/codegen_gradio_ui_main.png)
@@ -204,17 +217,17 @@ _(Port `5173` is the default host mapping)_
1. Modify `compose.yaml`: Swap Gradio service for Svelte (`codegen-gaudi-ui-server`), check port map (e.g., `5173:5173`).
2. Restart: `docker compose --profile <profile_name> up -d`
3. Access: `http://{HOST_IP}:5173`
3. Access: `http://{host_ip}:5173`
### React UI (Optional)
1. Modify `compose.yaml`: Swap Gradio service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`).
2. Restart: `docker compose --profile <profile_name> up -d`
3. Access: `http://{HOST_IP}:5174`
3. Access: `http://{host_ip}:5174`
### VS Code Extension (Optional)
Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${HOST_IP}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots).
Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${host_ip}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots).
## Troubleshooting
@@ -224,7 +237,7 @@ Use the `Neural Copilot` extension configured with the CodeGen backend URL: `htt
- Verify `runtime: habana` and volume mounts in `compose.yaml`.
- Gaudi initialization can take significant time and memory. Monitor resource usage.
- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`, internet access, proxy settings. Check LLM service logs.
- **Connection Errors:** Verify `HOST_IP`, ports, and proxy settings. Use `docker ps` and check service logs.
- **Connection Errors:** Verify `host_ip`, ports, and proxy settings. Use `docker ps` and check service logs.
## Stopping the Application

View File

@@ -1,51 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null
export HOST_IP=$(hostname -I | awk '{print $1}')
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
fi
if [ -z "${HOST_IP}" ]; then
echo "Error: HOST_IP is not set. Please set HOST_IP first."
fi
export no_proxy=${no_proxy},${HOST_IP}
export http_proxy=${http_proxy}
export https_proxy=${https_proxy}
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export LLM_SERVICE_PORT=9000
export LLM_ENDPOINT="http://${HOST_IP}:8028"
export LLM_SERVICE_HOST_IP=${HOST_IP}
export TGI_LLM_ENDPOINT="http://${HOST_IP}:8028"
export MEGA_SERVICE_PORT=7778
export MEGA_SERVICE_HOST_IP=${HOST_IP}
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:7778/v1/codegen"
export REDIS_DB_PORT=6379
export REDIS_INSIGHTS_PORT=8001
export REDIS_RETRIEVER_PORT=7000
export REDIS_URL="redis://${HOST_IP}:${REDIS_DB_PORT}"
export RETRIEVAL_SERVICE_HOST_IP=${HOST_IP}
export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
export INDEX_NAME="CodeGen"
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export EMBEDDER_PORT=6000
export TEI_EMBEDDER_PORT=8090
export TEI_EMBEDDING_HOST_IP=${HOST_IP}
export TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${TEI_EMBEDDER_PORT}"
export DATAPREP_REDIS_PORT=6007
export DATAPREP_ENDPOINT="http://${HOST_IP}:${DATAPREP_REDIS_PORT}/v1/dataprep"
export LOGFLAG=false
export MODEL_CACHE=${model_cache:-"./data"}
export NUM_CARDS=1

View File

@@ -0,0 +1,50 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null
export host_ip=$(hostname -I | awk '{print $1}')
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
fi
if [ -z "${host_ip}" ]; then
echo "Error: host_ip is not set. Please set host_ip first."
fi
export no_proxy=${no_proxy},${host_ip}
export http_proxy=${http_proxy}
export https_proxy=${https_proxy}
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
export LLM_SERVICE_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:8028"
export LLM_SERVICE_HOST_IP=${host_ip}
export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
export MEGA_SERVICE_PORT=7778
export MEGA_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
export REDIS_DB_PORT=6379
export REDIS_INSIGHTS_PORT=8001
export REDIS_RETRIEVER_PORT=7000
export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
export INDEX_NAME="CodeGen"
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export EMBEDDER_PORT=6000
export TEI_EMBEDDER_PORT=8090
export TEI_EMBEDDING_HOST_IP=${host_ip}
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export DATAPREP_REDIS_PORT=6007
export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
export LOGFLAG=false
export MODEL_CACHE="./data"
export NUM_CARDS=1

View File

@@ -1,33 +0,0 @@
# CodeGen E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Xeon with TGI:
```bash
bash test_compose_on_xeon.sh
```
On Intel Gaudi with TGI:
```bash
bash test_compose_on_gaudi.sh
```
On AMD ROCm with TGI:
```bash
bash test_compose_on_rocm.sh
```
On AMD ROCm with vLLM:
```bash
bash test_compose_vllm_on_rocm.sh
```

View File

@@ -10,11 +10,21 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
export REDIS_DB_PORT=6379
export REDIS_INSIGHTS_PORT=8001
export REDIS_RETRIEVER_PORT=7000
export EMBEDDER_PORT=6000
export TEI_EMBEDDER_PORT=8090
export DATAPREP_REDIS_PORT=6007
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
source $WORKPATH/docker_compose/intel/set_env.sh
export http_proxy=${http_proxy}
export https_proxy=${https_proxy}
export no_proxy=${no_proxy},${ip_address}
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
@@ -44,6 +54,28 @@ function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export LLM_ENDPOINT="http://${ip_address}:8028"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_PORT=7778
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
export NUM_CARDS=1
export host_ip=${ip_address}
export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
export INDEX_NAME="CodeGen"
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export TEI_EMBEDDING_HOST_IP=${host_ip}
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
export INDEX_NAME="CodeGen"
# Start Docker Containers
docker compose --profile ${compose_profile} up -d | tee ${LOG_PATH}/start_services_with_compose.log

View File

@@ -35,7 +35,18 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
source set_env.sh
export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export CODEGEN_TGI_SERVICE_PORT=8028
export CODEGEN_TGI_LLM_ENDPOINT="http://${ip_address}:${CODEGEN_TGI_SERVICE_PORT}"
export CODEGEN_LLM_SERVICE_PORT=9000
export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
export CODEGEN_BACKEND_SERVICE_PORT=7778
export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
export CODEGEN_UI_SERVICE_PORT=5173
export HOST_IP=${ip_address}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

View File

@@ -10,11 +10,20 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
export REDIS_DB_PORT=6379
export REDIS_INSIGHTS_PORT=8001
export REDIS_RETRIEVER_PORT=7000
export EMBEDDER_PORT=6000
export TEI_EMBEDDER_PORT=8090
export DATAPREP_REDIS_PORT=6007
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
source $WORKPATH/docker_compose/intel/set_env.sh
export http_proxy=${http_proxy}
export https_proxy=${https_proxy}
export no_proxy=${no_proxy},${ip_address}
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
@@ -47,6 +56,25 @@ function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export LLM_ENDPOINT="http://${ip_address}:8028"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_PORT=7778
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
export host_ip=${ip_address}
export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
export INDEX_NAME="CodeGen"
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export TEI_EMBEDDING_HOST_IP=${host_ip}
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
# Start Docker Containers
docker compose --profile ${compose_profile} up -d > ${LOG_PATH}/start_services_with_compose.log

View File

@@ -34,7 +34,18 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
source set_env_vllm.sh
export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export CODEGEN_VLLM_SERVICE_PORT=8028
export CODEGEN_VLLM_ENDPOINT="http://${ip_address}:${CODEGEN_VLLM_SERVICE_PORT}"
export CODEGEN_LLM_SERVICE_PORT=9000
export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
export CODEGEN_BACKEND_SERVICE_PORT=7778
export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
export CODEGEN_UI_SERVICE_PORT=5173
export HOST_IP=${ip_address}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env