Compare commits
21 Commits
suyue/perf
...
v1.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e380c18d56 | ||
|
|
ba0add7690 | ||
|
|
631f30dff8 | ||
|
|
0b55835259 | ||
|
|
17355b6719 | ||
|
|
63277feabb | ||
|
|
c956de0f51 | ||
|
|
82419b1818 | ||
|
|
ead526514d | ||
|
|
d1a8f0d07d | ||
|
|
48f577571b | ||
|
|
55815a3316 | ||
|
|
e0fc8d5405 | ||
|
|
c35e86cd08 | ||
|
|
c8f9c58148 | ||
|
|
4eb75e7b25 | ||
|
|
4804efc852 | ||
|
|
f8d60337e2 | ||
|
|
c8260d1ef4 | ||
|
|
599d301ee8 | ||
|
|
d3a84108af |
10
.github/CODEOWNERS
vendored
10
.github/CODEOWNERS
vendored
@@ -4,13 +4,13 @@
|
||||
/AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com
|
||||
/AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com
|
||||
/ChatQnA/ liang1.lv@intel.com letong.han@intel.com
|
||||
/CodeGen/ liang1.lv@intel.com
|
||||
/CodeTrans/ sihan.chen@intel.com
|
||||
/CodeGen/ liang1.lv@intel.com xinyao.wang@intel.com
|
||||
/CodeTrans/ sihan.chen@intel.com xinyao.wang@intel.com
|
||||
/DBQnA/ supriya.krishnamurthi@intel.com liang1.lv@intel.com
|
||||
/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
|
||||
/DocSum/ letong.han@intel.com
|
||||
/DocSum/ letong.han@intel.com xinyao.wang@intel.com
|
||||
/EdgeCraftRAG/ yongbo.zhu@intel.com mingyuan.qi@intel.com
|
||||
/FaqGen/ yogesh.pandey@intel.com
|
||||
/FaqGen/ yogesh.pandey@intel.com xinyao.wang@intel.com
|
||||
/GraphRAG/ rita.brugarolas.brufau@intel.com abolfazl.shahbazi@intel.com
|
||||
/InstructionTuning/ xinyu.ye@intel.com kaokao.lv@intel.com
|
||||
/MultimodalQnA/ melanie.h.buehler@intel.com tiep.le@intel.com
|
||||
@@ -19,5 +19,5 @@
|
||||
/SearchQnA/ sihan.chen@intel.com letong.han@intel.com
|
||||
/Text2Image/ wenjiao.yue@intel.com xinyu.ye@intel.com
|
||||
/Translation/ liang1.lv@intel.com sihan.chen@intel.com
|
||||
/VideoQnA/ huiling.bao@intel.com
|
||||
/VideoQnA/ huiling.bao@intel.com xinyao.wang@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
|
||||
36
.github/workflows/_helm-e2e.yml
vendored
36
.github/workflows/_helm-e2e.yml
vendored
@@ -2,9 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Helm Chart E2e Test For Call
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
permissions: read-all
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@@ -137,28 +135,16 @@ jobs:
|
||||
env:
|
||||
example: ${{ inputs.example }}
|
||||
run: |
|
||||
if [[ ! "$example" =~ ^[a-zA-Z]{1,20}$ ]] || [[ "$example" =~ \.\. ]] || [[ "$example" == -* || "$example" == *- ]]; then
|
||||
echo "Error: Invalid input - only lowercase alphanumeric and internal hyphens allowed"
|
||||
exit 1
|
||||
fi
|
||||
# SAFE_PREFIX="kb-"
|
||||
CHART_NAME="${SAFE_PREFIX}$(echo "$example" | tr '[:upper:]' '[:lower:]')"
|
||||
RAND_SUFFIX=$(openssl rand -hex 2 | tr -dc 'a-f0-9')
|
||||
|
||||
cat <<EOF >> $GITHUB_ENV
|
||||
CHART_NAME=${CHART_NAME}
|
||||
RELEASE_NAME=${CHART_NAME}-$(date +%s)
|
||||
NAMESPACE=ns-${CHART_NAME}-${RAND_SUFFIX}
|
||||
ROLLOUT_TIMEOUT_SECONDS=600s
|
||||
TEST_TIMEOUT_SECONDS=600s
|
||||
KUBECTL_TIMEOUT_SECONDS=60s
|
||||
should_cleanup=false
|
||||
skip_validate=false
|
||||
CHART_FOLDER=${example}/kubernetes/helm
|
||||
EOF
|
||||
|
||||
echo "Generated safe variables:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- CHART_NAME: ${CHART_NAME}" >> $GITHUB_STEP_SUMMARY
|
||||
CHART_NAME="${example,,}" # CodeGen
|
||||
echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
|
||||
echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=${CHART_NAME}-$(head -c 4 /dev/urandom | xxd -p)" >> $GITHUB_ENV
|
||||
echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
|
||||
echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
|
||||
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=false" >> $GITHUB_ENV
|
||||
echo "CHART_FOLDER=${example}/kubernetes/helm" >> $GITHUB_ENV
|
||||
|
||||
- name: Helm install
|
||||
id: install
|
||||
|
||||
3
.github/workflows/pr-chart-e2e.yml
vendored
3
.github/workflows/pr-chart-e2e.yml
vendored
@@ -19,9 +19,6 @@ concurrency:
|
||||
jobs:
|
||||
job1:
|
||||
name: Get-Test-Matrix
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
|
||||
|
||||
@@ -96,21 +96,20 @@ flowchart LR
|
||||
|
||||
The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware.
|
||||
|
||||
| Category | Deployment Option | Description |
|
||||
| ------------------------------------------------------------------------------------------------------------------------------ | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| On-premise Deployments | Docker compose | [ChatQnA deployment on Xeon](./docker_compose/intel/cpu/xeon/README.md) |
|
||||
| | | [ChatQnA deployment on AI PC](./docker_compose/intel/cpu/aipc/README.md) |
|
||||
| | | [ChatQnA deployment on Gaudi](./docker_compose/intel/hpu/gaudi/README.md) |
|
||||
| | | [ChatQnA deployment on Nvidia GPU](./docker_compose/nvidia/gpu/README.md) |
|
||||
| | | [ChatQnA deployment on AMD ROCm](./docker_compose/amd/gpu/rocm/README.md) |
|
||||
| Cloud Platforms Deployment on AWS, GCP, Azure, IBM Cloud,Oracle Cloud, [Intel® Tiber™ AI Cloud](https://ai.cloud.intel.com/) | Docker Compose | [Getting Started Guide: Deploy the ChatQnA application across multiple cloud platforms](https://github.com/opea-project/docs/tree/main/getting-started/README.md) |
|
||||
| | Kubernetes | [Helm Charts](./kubernetes/helm/README.md) |
|
||||
| Automated Terraform Deployment on Cloud Service Providers | AWS | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using meta-llama/Meta-Llama-3-8B-Instruct ](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
|
||||
| | | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using TII Falcon2-11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) |
|
||||
| | GCP | [Terraform deployment on 5th Gen Intel Xeon with Intel AMX(support Confidential AI by using Intel® TDX](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
|
||||
| | Azure | [Terraform deployment on 4th/5th Gen Intel Xeon with Intel AMX & Intel TDX](https://github.com/intel/terraform-intel-azure-linux-vm/tree/main/examples/azure-gen-ai-xeon-opea-chatqna-tdx) |
|
||||
| | Intel Tiber AI Cloud | Coming Soon |
|
||||
| | Any Xeon based Ubuntu system | [ChatQnA Ansible Module for Ubuntu 20.04](https://github.com/intel/optimized-cloud-recipes/tree/main/recipes/ai-opea-chatqna-xeon). Use this if you are not using Terraform and have provisioned your system either manually or with another tool, including directly on bare metal. |
|
||||
| Category | Deployment Option | Description |
|
||||
| ----------------------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| On-premise Deployments | Docker compose | [ChatQnA deployment on Xeon](./docker_compose/intel/cpu/xeon) |
|
||||
| | | [ChatQnA deployment on AI PC](./docker_compose/intel/cpu/aipc) |
|
||||
| | | [ChatQnA deployment on Gaudi](./docker_compose/intel/hpu/gaudi) |
|
||||
| | | [ChatQnA deployment on Nvidia GPU](./docker_compose/nvidia/gpu) |
|
||||
| | | [ChatQnA deployment on AMD ROCm](./docker_compose/amd/gpu/rocm) |
|
||||
| | Kubernetes | [Helm Charts](./kubernetes/helm) |
|
||||
| Cloud Service Providers | AWS | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using meta-llama/Meta-Llama-3-8B-Instruct ](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
|
||||
| | | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using TII Falcon2-11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) |
|
||||
| | GCP | [Terraform deployment on 5th Gen Intel Xeon with Intel AMX(support Confidential AI by using Intel® TDX](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
|
||||
| | Azure | [Terraform deployment on 4th/5th Gen Intel Xeon with Intel AMX & Intel TDX](https://github.com/intel/terraform-intel-azure-linux-vm/tree/main/examples/azure-gen-ai-xeon-opea-chatqna-tdx) |
|
||||
| | Intel Tiber AI Cloud | Coming Soon |
|
||||
| | Any Xeon based Ubuntu system | [ChatQnA Ansible Module for Ubuntu 20.04](https://github.com/intel/optimized-cloud-recipes/tree/main/recipes/ai-opea-chatqna-xeon) .Use this if you are not using Terraform and have provisioned your system either manually or with another tool, including directly on bare metal. |
|
||||
|
||||
## Monitor and Tracing
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ deploy:
|
||||
|
||||
llm:
|
||||
engine: vllm # or tgi
|
||||
model_id: "meta-llama/Llama-3.1-8B-Instruct" # mandatory
|
||||
model_id: "meta-llama/Meta-Llama-3-8B-Instruct" # mandatory
|
||||
replicaCount:
|
||||
with_teirerank: [7, 15, 31, 63] # When teirerank.enabled is True
|
||||
without_teirerank: [8, 16, 32, 64] # When teirerank.enabled is False
|
||||
|
||||
@@ -46,24 +46,13 @@ dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
|
||||
|
||||
|
||||
# Define the functions that will be used in the app
|
||||
|
||||
|
||||
def add_to_history(prompt, history):
|
||||
history.append([prompt["text"], ""])
|
||||
return history, ""
|
||||
|
||||
|
||||
def conversation_history(prompt, index, use_agent, history):
|
||||
print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}")
|
||||
history = add_to_history(prompt, history)[0]
|
||||
response_generator = generate_code(prompt["text"], index, use_agent)
|
||||
history.append([prompt, ""])
|
||||
response_generator = generate_code(prompt, index, use_agent)
|
||||
for token in response_generator:
|
||||
history[-1][-1] += token
|
||||
yield history, ""
|
||||
|
||||
|
||||
def clear_history():
|
||||
return ""
|
||||
yield history
|
||||
|
||||
|
||||
def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
|
||||
@@ -298,32 +287,19 @@ def get_file_names(files):
|
||||
# Define UI components
|
||||
with gr.Blocks() as ui:
|
||||
with gr.Tab("Code Generation"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=2):
|
||||
gr.Markdown("### Generate Code from Natural Language")
|
||||
chatbot = gr.Chatbot(label="Chat History")
|
||||
prompt_input = gr.Textbox(label="Enter your query")
|
||||
with gr.Column():
|
||||
with gr.Row(equal_height=True):
|
||||
database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None", scale=10)
|
||||
db_refresh_button = gr.Button("Refresh Dropdown", scale=0.1)
|
||||
db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown)
|
||||
use_agent = gr.Checkbox(label="Use Agent", container=False)
|
||||
|
||||
with gr.Column(scale=9):
|
||||
gr.Markdown("### Generate Code from Natural Language")
|
||||
chatbot = gr.Chatbot(label="Chat History")
|
||||
with gr.Row(equal_height=True):
|
||||
with gr.Column(scale=8):
|
||||
prompt_input = gr.MultimodalTextbox(
|
||||
show_label=False, interactive=True, placeholder="Enter your query", sources=[]
|
||||
)
|
||||
with gr.Column(scale=1, min_width=150):
|
||||
with gr.Row(elem_id="buttons") as button_row:
|
||||
clear_btn = gr.Button(value="🗑️ Clear", interactive=True)
|
||||
clear_btn.click(clear_history, None, chatbot)
|
||||
|
||||
prompt_input.submit(add_to_history, inputs=[prompt_input, chatbot], outputs=[chatbot, prompt_input])
|
||||
|
||||
prompt_input.submit(
|
||||
conversation_history,
|
||||
inputs=[prompt_input, database_dropdown, use_agent, chatbot],
|
||||
outputs=[chatbot, prompt_input],
|
||||
generate_button = gr.Button("Generate Code")
|
||||
generate_button.click(
|
||||
conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot
|
||||
)
|
||||
|
||||
with gr.Tab("Resource Management"):
|
||||
@@ -339,7 +315,7 @@ with gr.Blocks() as ui:
|
||||
)
|
||||
with gr.Column(scale=3):
|
||||
file_upload = gr.File(label="Upload Files", file_count="multiple")
|
||||
url_input = gr.Textbox(label="Media to be ingested. Append URL's in a new line (Shift + Enter)")
|
||||
url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)")
|
||||
upload_button = gr.Button("Upload", variant="primary")
|
||||
upload_status = gr.Textbox(label="Upload Status")
|
||||
file_upload.change(get_file_names, inputs=file_upload, outputs=url_input)
|
||||
|
||||
@@ -8,14 +8,14 @@
|
||||
# which can be used to connect to the server from the Internet. It must be specified in the EXTERNAL_HOST_IP variable.
|
||||
# If the server is used only on the internal network or has a direct external address,
|
||||
# specify it in HOST_IP and in EXTERNAL_HOST_IP.
|
||||
export HOST_IP=${ip_address}
|
||||
export EXTERNAL_HOST_IP=${ip_address}
|
||||
export HOST_IP=''
|
||||
export EXTERNAL_HOST_IP=''
|
||||
|
||||
### Model ID
|
||||
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
|
||||
### The port of the TGI service. On this port, the TGI service will accept connections
|
||||
export CODETRANS_TGI_SERVICE_PORT=8008
|
||||
export CODETRANS_TGI_SERVICE_PORT=18156
|
||||
|
||||
### The endpoint of the TGI service to which requests to this service will be sent (formed from previously set variables)
|
||||
export CODETRANS_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_TGI_SERVICE_PORT}"
|
||||
@@ -24,7 +24,7 @@ export CODETRANS_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_TGI_SERVICE_POR
|
||||
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
### The port of the LLM service. On this port, the LLM service will accept connections
|
||||
export CODETRANS_LLM_SERVICE_PORT=9000
|
||||
export CODETRANS_LLM_SERVICE_PORT=18157
|
||||
|
||||
### The IP address or domain name of the server for CodeTrans MegaService
|
||||
export CODETRANS_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
@@ -36,7 +36,7 @@ export CODETRANS_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CODETRANS_FRONTEND_SERVICE_IP=${HOST_IP}
|
||||
|
||||
### The port of the frontend service
|
||||
export CODETRANS_FRONTEND_SERVICE_PORT=5173
|
||||
export CODETRANS_FRONTEND_SERVICE_PORT=18155
|
||||
|
||||
### Name of GenAI service for route requests to application
|
||||
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
|
||||
@@ -45,10 +45,10 @@ export CODETRANS_BACKEND_SERVICE_NAME=codetrans
|
||||
export CODETRANS_BACKEND_SERVICE_IP=${HOST_IP}
|
||||
|
||||
### The port of the backend service
|
||||
export CODETRANS_BACKEND_SERVICE_PORT=7777
|
||||
export CODETRANS_BACKEND_SERVICE_PORT=18154
|
||||
|
||||
### The port of the Nginx reverse proxy for application
|
||||
export CODETRANS_NGINX_PORT=8088
|
||||
export CODETRANS_NGINX_PORT=18153
|
||||
|
||||
### Endpoint of the backend service
|
||||
export CODETRANS_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"
|
||||
|
||||
@@ -8,14 +8,14 @@
|
||||
# which can be used to connect to the server from the Internet. It must be specified in the EXTERNAL_HOST_IP variable.
|
||||
# If the server is used only on the internal network or has a direct external address,
|
||||
# specify it in HOST_IP and in EXTERNAL_HOST_IP.
|
||||
export HOST_IP=${ip_address}
|
||||
export EXTERNAL_HOST_IP=${ip_address}
|
||||
export HOST_IP=''
|
||||
export EXTERNAL_HOST_IP=''
|
||||
|
||||
### Model ID
|
||||
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
|
||||
### The port of the TGI service. On this port, the TGI service will accept connections
|
||||
export CODETRANS_VLLM_SERVICE_PORT=8008
|
||||
export CODETRANS_VLLM_SERVICE_PORT=18156
|
||||
|
||||
### The endpoint of the TGI service to which requests to this service will be sent (formed from previously set variables)
|
||||
export CODETRANS_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_VLLM_SERVICE_PORT}"
|
||||
@@ -24,7 +24,7 @@ export CODETRANS_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_VLLM_SERVICE_PORT}"
|
||||
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
### The port of the LLM service. On this port, the LLM service will accept connections
|
||||
export CODETRANS_LLM_SERVICE_PORT=9000
|
||||
export CODETRANS_LLM_SERVICE_PORT=18157
|
||||
|
||||
### The IP address or domain name of the server for CodeTrans MegaService
|
||||
export CODETRANS_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
@@ -36,7 +36,7 @@ export CODETRANS_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CODETRANS_FRONTEND_SERVICE_IP=${HOST_IP}
|
||||
|
||||
### The port of the frontend service
|
||||
export CODETRANS_FRONTEND_SERVICE_PORT=5173
|
||||
export CODETRANS_FRONTEND_SERVICE_PORT=18155
|
||||
|
||||
### Name of GenAI service for route requests to application
|
||||
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
|
||||
@@ -45,10 +45,10 @@ export CODETRANS_BACKEND_SERVICE_NAME=codetrans
|
||||
export CODETRANS_BACKEND_SERVICE_IP=${HOST_IP}
|
||||
|
||||
### The port of the backend service
|
||||
export CODETRANS_BACKEND_SERVICE_PORT=7777
|
||||
export CODETRANS_BACKEND_SERVICE_PORT=18154
|
||||
|
||||
### The port of the Nginx reverse proxy for application
|
||||
export CODETRANS_NGINX_PORT=8088
|
||||
export CODETRANS_NGINX_PORT=18153
|
||||
|
||||
### Endpoint of the backend service
|
||||
export CODETRANS_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
# CodeTrans E2E test scripts
|
||||
|
||||
## Set the required environment variable
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
## Run test
|
||||
|
||||
On Intel Xeon with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_tgi_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_tgi_on_gaudi.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_gaudi.sh
|
||||
```
|
||||
|
||||
On AMD ROCm with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_rocm.sh
|
||||
```
|
||||
|
||||
On AMD ROCm with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_vllm_on_rocm.sh
|
||||
```
|
||||
@@ -42,12 +42,25 @@ function build_docker_images() {
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_COMPONENT_NAME="OpeaTextGenService"
|
||||
export NUM_CARDS=1
|
||||
export BLOCK_SIZE=128
|
||||
export MAX_NUM_SEQS=256
|
||||
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
|
||||
export FRONTEND_SERVICE_IP=${ip_address}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_NAME=codetrans
|
||||
export BACKEND_SERVICE_IP=${ip_address}
|
||||
export BACKEND_SERVICE_PORT=7777
|
||||
export NGINX_PORT=80
|
||||
export host_ip=${ip_address}
|
||||
source set_env.sh
|
||||
cd intel/hpu/gaudi
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -42,7 +42,21 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||
source set_env.sh
|
||||
export CODETRANS_TGI_SERVICE_PORT=8008
|
||||
export CODETRANS_LLM_SERVICE_PORT=9000
|
||||
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
export CODETRANS_TGI_LLM_ENDPOINT="http://${ip_address}:${CODETRANS_TGI_SERVICE_PORT}"
|
||||
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CODETRANS_MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export CODETRANS_LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export CODETRANS_FRONTEND_SERVICE_IP=${ip_address}
|
||||
export CODETRANS_FRONTEND_SERVICE_PORT=5173
|
||||
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
|
||||
export CODETRANS_BACKEND_SERVICE_IP=${ip_address}
|
||||
export CODETRANS_BACKEND_SERVICE_PORT=7777
|
||||
export CODETRANS_NGINX_PORT=8088
|
||||
export CODETRANS_BACKEND_SERVICE_URL="http://${ip_address}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"
|
||||
export HOST_IP=${ip_address}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -44,13 +44,21 @@ function build_docker_images() {
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_COMPONENT_NAME="OpeaTextGenService"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
|
||||
export FRONTEND_SERVICE_IP=${ip_address}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_NAME=codetrans
|
||||
export BACKEND_SERVICE_IP=${ip_address}
|
||||
export BACKEND_SERVICE_PORT=7777
|
||||
export NGINX_PORT=80
|
||||
export host_ip=${ip_address}
|
||||
source set_env.sh
|
||||
cd intel/cpu/xeon/
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -40,13 +40,21 @@ function build_docker_images() {
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_COMPONENT_NAME="OpeaTextGenService"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
|
||||
export FRONTEND_SERVICE_IP=${ip_address}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_NAME=codetrans
|
||||
export BACKEND_SERVICE_IP=${ip_address}
|
||||
export BACKEND_SERVICE_PORT=7777
|
||||
export NGINX_PORT=80
|
||||
export host_ip=${ip_address}
|
||||
source set_env.sh
|
||||
cd intel/hpu/gaudi/
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -40,13 +40,21 @@ function build_docker_images() {
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_COMPONENT_NAME="OpeaTextGenService"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
|
||||
export FRONTEND_SERVICE_IP=${ip_address}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_NAME=codetrans
|
||||
export BACKEND_SERVICE_IP=${ip_address}
|
||||
export BACKEND_SERVICE_PORT=7777
|
||||
export NGINX_PORT=80
|
||||
export host_ip=${ip_address}
|
||||
source set_env.sh
|
||||
cd intel/cpu/xeon/
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -40,7 +40,22 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||
source set_env_vllm.sh
|
||||
export HOST_IP=${ip_address}
|
||||
export CODETRANS_VLLM_SERVICE_PORT=8008
|
||||
export CODETRANS_LLM_SERVICE_PORT=9000
|
||||
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
export CODETRANS_LLM_ENDPOINT="http://${ip_address}:${CODETRANS_VLLM_SERVICE_PORT}"
|
||||
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CODETRANS_MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export CODETRANS_LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export CODETRANS_FRONTEND_SERVICE_IP=${ip_address}
|
||||
export CODETRANS_FRONTEND_SERVICE_PORT=5173
|
||||
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
|
||||
export CODETRANS_BACKEND_SERVICE_IP=${ip_address}
|
||||
export CODETRANS_BACKEND_SERVICE_PORT=7777
|
||||
export CODETRANS_NGINX_PORT=8088
|
||||
export CODETRANS_BACKEND_SERVICE_URL="http://${ip_address}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"
|
||||
export HOST_IP=${ip_address}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP=''
|
||||
export DOCSUM_MAX_INPUT_TOKENS="2048"
|
||||
export DOCSUM_MAX_TOTAL_TOKENS="4096"
|
||||
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP=''
|
||||
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export DOCSUM_MAX_INPUT_TOKENS=2048
|
||||
export DOCSUM_MAX_TOTAL_TOKENS=4096
|
||||
|
||||
@@ -10,7 +10,7 @@ export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0
|
||||
export http_proxy=$http_proxy
|
||||
export https_proxy=$https_proxy
|
||||
export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
@@ -20,12 +20,10 @@ export MAX_TOTAL_TOKENS=2048
|
||||
export LLM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM" # OpeaDocSumTgi
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
|
||||
|
||||
export LOGFLAG=True
|
||||
|
||||
@@ -16,150 +16,3 @@ helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGI
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on AMD ROCm using Helm charts from the binary Helm repository
|
||||
|
||||
```bash
|
||||
mkdir ~/docsum-k8s-install && cd ~/docsum-k8s-install
|
||||
```
|
||||
|
||||
### Cloning repos
|
||||
|
||||
```bash
|
||||
git clone git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
### Go to the installation directory
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/kubernetes/helm
|
||||
```
|
||||
|
||||
### Settings system variables
|
||||
|
||||
```bash
|
||||
export HFTOKEN="your_huggingface_token"
|
||||
export MODELDIR="/mnt/opea-models"
|
||||
export MODELNAME="Intel/neural-chat-7b-v3-3"
|
||||
```
|
||||
|
||||
### Setting variables in Values files
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- TENSOR_PARALLEL_SIZE - must match the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
#### If ROCm TGI used
|
||||
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- extraCmdArgs: [ "--num-shard","1" ] - replace "1" with the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
### Installing the Helm Chart
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
helm upgrade --install docsum oci://ghcr.io/opea-project/charts/docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values rocm-values.yaml
|
||||
```
|
||||
|
||||
#### If ROCm TGI used
|
||||
```bash
|
||||
helm upgrade --install docsum oci://ghcr.io/opea-project/charts/docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on AMD ROCm using Helm charts from Git repositories
|
||||
|
||||
### Creating working dirs
|
||||
|
||||
```bash
|
||||
mkdir ~/docsum-k8s-install && cd ~/docsum-k8s-install
|
||||
```
|
||||
|
||||
### Cloning repos
|
||||
|
||||
```bash
|
||||
git clone git clone https://github.com/opea-project/GenAIExamples.git
|
||||
git clone git clone https://github.com/opea-project/GenAIInfra.git
|
||||
```
|
||||
|
||||
### Go to the installation directory
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/kubernetes/helm
|
||||
```
|
||||
|
||||
### Settings system variables
|
||||
|
||||
```bash
|
||||
export HFTOKEN="your_huggingface_token"
|
||||
export MODELDIR="/mnt/opea-models"
|
||||
export MODELNAME="Intel/neural-chat-7b-v3-3"
|
||||
```
|
||||
|
||||
### Setting variables in Values files
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- TENSOR_PARALLEL_SIZE - must match the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
#### If ROCm TGI used
|
||||
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- extraCmdArgs: [ "--num-shard","1" ] - replace "1" with the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
### Installing the Helm Chart
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
cd ~/docsum-k8s-install/GenAIInfra/helm-charts
|
||||
./update_dependency.sh
|
||||
helm dependency update docsum
|
||||
helm upgrade --install docsum docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values ../../GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
|
||||
```
|
||||
|
||||
#### If ROCm TGI used
|
||||
```bash
|
||||
cd ~/docsum-k8s-install/GenAIInfra/helm-charts
|
||||
./update_dependency.sh
|
||||
helm dependency update docsum
|
||||
helm upgrade --install docsum docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values ../../GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
tgi:
|
||||
enabled: true
|
||||
accelDevice: "rocm"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/text-generation-inference
|
||||
tag: "2.4.1-rocm"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
USE_FLASH_ATTENTION: "false"
|
||||
FLASH_ATTENTION_RECOMPUTE: "false"
|
||||
HIP_VISIBLE_DEVICES: "0"
|
||||
MAX_BATCH_SIZE: "4"
|
||||
extraCmdArgs: [ "--num-shard","1" ]
|
||||
resources:
|
||||
limits:
|
||||
amd.com/gpu: "1"
|
||||
requests:
|
||||
cpu: 1
|
||||
memory: 16Gi
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: false
|
||||
runAsUser: 0
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_PTRACE
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
startupProbe:
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
|
||||
llm-uservice:
|
||||
DOCSUM_BACKEND: "TGI"
|
||||
retryTimeoutSeconds: 720
|
||||
|
||||
vllm:
|
||||
enabled: false
|
||||
@@ -1,40 +0,0 @@
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
tgi:
|
||||
enabled: false
|
||||
|
||||
llm-uservice:
|
||||
DOCSUM_BACKEND: "vLLM"
|
||||
retryTimeoutSeconds: 720
|
||||
|
||||
vllm:
|
||||
enabled: true
|
||||
accelDevice: "rocm"
|
||||
image:
|
||||
repository: opea/vllm-rocm
|
||||
tag: latest
|
||||
env:
|
||||
HIP_VISIBLE_DEVICES: "0"
|
||||
TENSOR_PARALLEL_SIZE: "1"
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: "1"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0"
|
||||
VLLM_USE_TRITON_FLASH_ATTN: "0"
|
||||
VLLM_WORKER_MULTIPROC_METHOD: "spawn"
|
||||
PYTORCH_JIT: "0"
|
||||
HF_HOME: "/data"
|
||||
extraCmd:
|
||||
command: [ "python3", "/workspace/api_server.py" ]
|
||||
extraCmdArgs: [ "--swap-space", "16",
|
||||
"--disable-log-requests",
|
||||
"--dtype", "float16",
|
||||
"--num-scheduler-steps", "1",
|
||||
"--distributed-executor-backend", "mp" ]
|
||||
resources:
|
||||
limits:
|
||||
amd.com/gpu: "1"
|
||||
startupProbe:
|
||||
failureThreshold: 180
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: false
|
||||
runAsUser: 0
|
||||
@@ -1,45 +0,0 @@
|
||||
# DocSum E2E test scripts
|
||||
|
||||
## Set the required environment variable
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
## Run test
|
||||
|
||||
On Intel Xeon with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_tgi_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_gaudi.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_tgi_on_gaudi.sh
|
||||
```
|
||||
|
||||
On AMD ROCm with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_rocm.sh
|
||||
```
|
||||
|
||||
On AMD ROCm with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_vllm_on_rocm.sh
|
||||
```
|
||||
@@ -10,22 +10,35 @@ export http_proxy=$http_proxy
|
||||
export https_proxy=$https_proxy
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
source $WORKPATH/docker_compose/set_env.sh
|
||||
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export NUM_CARDS=1
|
||||
export BLOCK_SIZE=128
|
||||
export MAX_NUM_SEQS=256
|
||||
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
export LLM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
|
||||
export LOGFLAG=True
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
|
||||
# Get the root folder of the current script
|
||||
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
@@ -14,8 +14,21 @@ export MODEL_CACHE=${model_cache:-"./data"}
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export host_ip=${ip_address}
|
||||
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env.sh
|
||||
export DOCSUM_MAX_INPUT_TOKENS="2048"
|
||||
export DOCSUM_MAX_TOTAL_TOKENS="4096"
|
||||
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export DOCSUM_TGI_SERVICE_PORT="8008"
|
||||
export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=''
|
||||
export DOCSUM_WHISPER_PORT="7066"
|
||||
export ASR_SERVICE_HOST_IP="${HOST_IP}"
|
||||
export DOCSUM_LLM_SERVER_PORT="9000"
|
||||
export DOCSUM_BACKEND_SERVER_PORT="18072"
|
||||
export DOCSUM_FRONTEND_PORT="18073"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
@@ -116,7 +129,7 @@ function validate_microservices() {
|
||||
# whisper microservice
|
||||
ulimit -s 65536
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \
|
||||
"${host_ip}:${DOCSUM_WHISPER_PORT}/v1/asr" \
|
||||
'{"asr_result":"well"}' \
|
||||
"whisper-service" \
|
||||
"whisper-service" \
|
||||
@@ -124,7 +137,7 @@ function validate_microservices() {
|
||||
|
||||
# tgi for llm service
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}/generate" \
|
||||
"${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/generate" \
|
||||
"generated_text" \
|
||||
"docsum-tgi-service" \
|
||||
"docsum-tgi-service" \
|
||||
@@ -132,7 +145,7 @@ function validate_microservices() {
|
||||
|
||||
# llm microservice
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
|
||||
"text" \
|
||||
"docsum-llm-server" \
|
||||
"docsum-llm-server" \
|
||||
@@ -145,7 +158,7 @@ function validate_megaservice() {
|
||||
local DOCKER_NAME="docsum-backend-server"
|
||||
local EXPECTED_RESULT="[DONE]"
|
||||
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
local URL="${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||
local URL="${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||
local DATA_TYPE="type=text"
|
||||
|
||||
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
|
||||
@@ -175,7 +188,7 @@ function validate_megaservice_json() {
|
||||
echo ""
|
||||
echo ">>> Checking text data with Content-Type: application/json"
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"[DONE]" \
|
||||
"docsum-backend-server" \
|
||||
"docsum-backend-server" \
|
||||
@@ -183,7 +196,7 @@ function validate_megaservice_json() {
|
||||
|
||||
echo ">>> Checking audio data"
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"[DONE]" \
|
||||
"docsum-backend-server" \
|
||||
"docsum-backend-server" \
|
||||
@@ -191,7 +204,7 @@ function validate_megaservice_json() {
|
||||
|
||||
echo ">>> Checking video data"
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"[DONE]" \
|
||||
"docsum-backend-server" \
|
||||
"docsum-backend-server" \
|
||||
|
||||
@@ -10,18 +10,30 @@ export http_proxy=$http_proxy
|
||||
export https_proxy=$https_proxy
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
source $WORKPATH/docker_compose/set_env.sh
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
export LLM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
|
||||
export LOGFLAG=True
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
# Get the root folder of the current script
|
||||
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
@@ -9,20 +9,32 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
export http_proxy=$http_proxy
|
||||
export https_proxy=$https_proxy
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
source $WORKPATH/docker_compose/set_env.sh
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
|
||||
export LLM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
|
||||
export LOGFLAG=True
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
|
||||
# Get the root folder of the current script
|
||||
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
@@ -9,20 +9,31 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
export http_proxy=$http_proxy
|
||||
export https_proxy=$https_proxy
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
source $WORKPATH/docker_compose/set_env.sh
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
|
||||
export LLM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
|
||||
export LOGFLAG=True
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
# Get the root folder of the current script
|
||||
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
@@ -16,7 +16,21 @@ WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_vllm.sh
|
||||
export host_ip=${ip_address}
|
||||
export HOST_IP=${ip_address}
|
||||
export EXTERNAL_HOST_IP=${ip_address}
|
||||
export DOCSUM_HUGGINGFACEHUB_API_TOKEN="${HUGGINGFACEHUB_API_TOKEN}"
|
||||
export DOCSUM_MAX_INPUT_TOKENS=2048
|
||||
export DOCSUM_MAX_TOTAL_TOKENS=4096
|
||||
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export DOCSUM_VLLM_SERVICE_PORT="8008"
|
||||
export DOCSUM_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}"
|
||||
export DOCSUM_WHISPER_PORT="7066"
|
||||
export ASR_SERVICE_HOST_IP="${HOST_IP}"
|
||||
export DOCSUM_LLM_SERVER_PORT="9000"
|
||||
export DOCSUM_BACKEND_SERVER_PORT="18072"
|
||||
export DOCSUM_FRONTEND_PORT="18073"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${EXTERNAL_HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
@@ -116,7 +130,7 @@ function validate_microservices() {
|
||||
# whisper microservice
|
||||
ulimit -s 65536
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \
|
||||
"${host_ip}:${DOCSUM_WHISPER_PORT}/v1/asr" \
|
||||
'{"asr_result":"well"}' \
|
||||
"whisper-service" \
|
||||
"whisper-service" \
|
||||
@@ -124,7 +138,7 @@ function validate_microservices() {
|
||||
|
||||
# vLLM service
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \
|
||||
"${host_ip}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \
|
||||
"content" \
|
||||
"docsum-vllm-service" \
|
||||
"docsum-vllm-service" \
|
||||
@@ -132,7 +146,7 @@ function validate_microservices() {
|
||||
|
||||
# llm microservice
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
|
||||
"text" \
|
||||
"docsum-llm-server" \
|
||||
"docsum-llm-server" \
|
||||
@@ -145,7 +159,7 @@ function validate_megaservice() {
|
||||
local DOCKER_NAME="docsum-backend-server"
|
||||
local EXPECTED_RESULT="[DONE]"
|
||||
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
local URL="${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||
local URL="${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||
local DATA_TYPE="type=text"
|
||||
|
||||
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
|
||||
@@ -175,7 +189,7 @@ function validate_megaservice_json() {
|
||||
echo ""
|
||||
echo ">>> Checking text data with Content-Type: application/json"
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"[DONE]" \
|
||||
"docsum-backend-server" \
|
||||
"docsum-backend-server" \
|
||||
@@ -183,7 +197,7 @@ function validate_megaservice_json() {
|
||||
|
||||
echo ">>> Checking audio data"
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"[DONE]" \
|
||||
"docsum-backend-server" \
|
||||
"docsum-backend-server" \
|
||||
@@ -191,7 +205,7 @@ function validate_megaservice_json() {
|
||||
|
||||
echo ">>> Checking video data"
|
||||
validate_services \
|
||||
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
|
||||
"[DONE]" \
|
||||
"docsum-backend-server" \
|
||||
"docsum-backend-server" \
|
||||
|
||||
@@ -14,19 +14,16 @@ services:
|
||||
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
|
||||
edgecraftrag-server:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: ./Dockerfile.server
|
||||
extends: edgecraftrag
|
||||
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
|
||||
edgecraftrag-ui:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: ./ui/docker/Dockerfile.ui
|
||||
extends: edgecraftrag
|
||||
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
|
||||
edgecraftrag-ui-gradio:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: ./ui/docker/Dockerfile.gradio
|
||||
extends: edgecraftrag
|
||||
image: ${REGISTRY:-opea}/edgecraftrag-ui-gradio:${TAG:-latest}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
# Translation E2E test scripts
|
||||
|
||||
## Set the required environment variable
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
## Run test
|
||||
|
||||
On Intel Xeon:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_xeon.sh
|
||||
```
|
||||
@@ -192,7 +192,7 @@ def configure_rerank(values, with_rerank, deploy_config, example_type, node_sele
|
||||
values["teirerank"]["nodeSelector"] = {key: value for key, value in node_selector.items()}
|
||||
else:
|
||||
if example_type == "chatqna":
|
||||
values["CHATQNA_TYPE"] = "CHATQNA_NO_RERANK"
|
||||
values["image"] = {"repository": "opea/chatqna-without-rerank"}
|
||||
if "teirerank" not in values:
|
||||
values["teirerank"] = {"enabled": False}
|
||||
elif "enabled" not in values["teirerank"]:
|
||||
|
||||
@@ -143,7 +143,7 @@ def pull_helm_chart(chart_pull_url, version, chart_name):
|
||||
return untar_dir
|
||||
|
||||
|
||||
def main(yaml_file, target_node=None, test_mode="oob", clean_up=True):
|
||||
def main(yaml_file, target_node=None, test_mode="oob"):
|
||||
"""Main function to process deployment configuration.
|
||||
|
||||
Args:
|
||||
@@ -278,9 +278,6 @@ def main(yaml_file, target_node=None, test_mode="oob", clean_up=True):
|
||||
chart_dir,
|
||||
]
|
||||
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
||||
print("Show deploy logs...")
|
||||
print(result.stdout)
|
||||
print("End of show deploy logs.")
|
||||
|
||||
match = re.search(r"values_file_path: (\S+)", result.stdout)
|
||||
if match:
|
||||
@@ -309,9 +306,6 @@ def main(yaml_file, target_node=None, test_mode="oob", clean_up=True):
|
||||
"--update-service",
|
||||
]
|
||||
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
||||
print("Show deploy logs...")
|
||||
print(result.stdout)
|
||||
print("End of show deploy logs.")
|
||||
if result.returncode != 0:
|
||||
print(f"Update failed for {node} nodes configuration with {param_name} {batch_param}")
|
||||
break # Skip remaining {param_name} for this node
|
||||
@@ -378,48 +372,36 @@ def main(yaml_file, target_node=None, test_mode="oob", clean_up=True):
|
||||
os.remove(temp_config_file)
|
||||
|
||||
finally:
|
||||
if clean_up:
|
||||
# Uninstall the deployment
|
||||
print(f"\nUninstalling deployment for {node} nodes...")
|
||||
cmd = [
|
||||
python_cmd,
|
||||
"deploy.py",
|
||||
"--chart-name",
|
||||
chart_name,
|
||||
"--namespace",
|
||||
namespace,
|
||||
"--uninstall",
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(cmd, check=True)
|
||||
if result.returncode != 0:
|
||||
print(f"Failed to uninstall deployment for {node} nodes")
|
||||
except Exception as e:
|
||||
print(f"Error while uninstalling deployment for {node} nodes: {str(e)}")
|
||||
# Uninstall the deployment
|
||||
print(f"\nUninstalling deployment for {node} nodes...")
|
||||
cmd = [
|
||||
python_cmd,
|
||||
"deploy.py",
|
||||
"--chart-name",
|
||||
chart_name,
|
||||
"--namespace",
|
||||
namespace,
|
||||
"--uninstall",
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(cmd, check=True)
|
||||
if result.returncode != 0:
|
||||
print(f"Failed to uninstall deployment for {node} nodes")
|
||||
except Exception as e:
|
||||
print(f"Error while uninstalling deployment for {node} nodes: {str(e)}")
|
||||
|
||||
# Delete labels for current node configuration
|
||||
print(f"Deleting labels for {node} nodes...")
|
||||
cmd = [
|
||||
python_cmd,
|
||||
"deploy.py",
|
||||
"--chart-name",
|
||||
chart_name,
|
||||
"--num-nodes",
|
||||
str(node),
|
||||
"--delete-label",
|
||||
]
|
||||
if current_node_names:
|
||||
cmd.extend(["--node-names"] + current_node_names)
|
||||
# Delete labels for current node configuration
|
||||
print(f"Deleting labels for {node} nodes...")
|
||||
cmd = [python_cmd, "deploy.py", "--chart-name", chart_name, "--num-nodes", str(node), "--delete-label"]
|
||||
if current_node_names:
|
||||
cmd.extend(["--node-names"] + current_node_names)
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, check=True)
|
||||
if result.returncode != 0:
|
||||
print(f"Failed to delete labels for {node} nodes")
|
||||
except Exception as e:
|
||||
print(f"Error while deleting labels for {node} nodes: {str(e)}")
|
||||
else:
|
||||
print("Skipping cleanup for local debug. Manual cleanup may be required.")
|
||||
exit(0)
|
||||
try:
|
||||
result = subprocess.run(cmd, check=True)
|
||||
if result.returncode != 0:
|
||||
print(f"Failed to delete labels for {node} nodes")
|
||||
except Exception as e:
|
||||
print(f"Error while deleting labels for {node} nodes: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing configuration for {node} nodes: {str(e)}")
|
||||
@@ -437,9 +419,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument("yaml_file", help="Path to the YAML configuration file")
|
||||
parser.add_argument("--target-node", type=int, help="Optional: Target number of nodes to deploy.", default=None)
|
||||
parser.add_argument("--test-mode", type=str, help="Test mode, either 'oob' (out of box) or 'tune'.", default="oob")
|
||||
parser.add_argument(
|
||||
"--clean-up", type=bool, help="Clean up after test, which can be closed for local debug.", default=True
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args.yaml_file, args.target_node, args.test_mode, args.clean_up)
|
||||
main(args.yaml_file, args.target_node, args.test_mode)
|
||||
|
||||
3
version.txt
Normal file
3
version.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
VERSION_MAJOR 1
|
||||
VERSION_MINOR 3
|
||||
VERSION_PATCH 0
|
||||
Reference in New Issue
Block a user