Set vLLM as default model for VisualQnA (#1644)
This commit is contained in:
@@ -10,28 +10,6 @@ For detailed information about these instance types, you can refer to this [link
|
||||
|
||||
After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
|
||||
|
||||
**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands**
|
||||
|
||||
> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow
|
||||
|
||||
```
|
||||
llava-tgi-service
|
||||
===========
|
||||
Port 8399 - Open to 0.0.0.0/0
|
||||
|
||||
llm
|
||||
===
|
||||
Port 9399 - Open to 0.0.0.0/0
|
||||
|
||||
visualqna-xeon-backend-server
|
||||
==========================
|
||||
Port 8888 - Open to 0.0.0.0/0
|
||||
|
||||
visualqna-xeon-ui-server
|
||||
=====================
|
||||
Port 5173 - Open to 0.0.0.0/0
|
||||
```
|
||||
|
||||
## 🚀 Build Docker Images
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
@@ -64,19 +42,23 @@ cd GenAIExamples/VisualQnA/ui
|
||||
docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Pull TGI Xeon Image
|
||||
### 4. Pull vLLM/TGI Xeon Image
|
||||
|
||||
```bash
|
||||
# vLLM
|
||||
docker pull opea/vllm:latest
|
||||
# TGI (Optional)
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
|
||||
2. `opea/lvm:latest`
|
||||
3. `opea/visualqna:latest`
|
||||
4. `opea/visualqna-ui:latest`
|
||||
5. `opea/nginx`
|
||||
1. `opea/vllm:latest`
|
||||
2. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu` (Optional)
|
||||
3. `opea/lvm:latest`
|
||||
4. `opea/visualqna:latest`
|
||||
5. `opea/visualqna-ui:latest`
|
||||
6. `opea/nginx`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -84,30 +66,8 @@ Then run the command `docker images`, you will have the following 5 Docker Image
|
||||
|
||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
||||
|
||||
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
|
||||
|
||||
> Change the External_Public_IP below with the actual IPV4 value
|
||||
|
||||
```
|
||||
export host_ip="External_Public_IP"
|
||||
```
|
||||
|
||||
**Append the value of the public IP address to the no_proxy list**
|
||||
|
||||
```
|
||||
export your_no_proxy="${your_no_proxy},${host_ip}"
|
||||
```
|
||||
|
||||
```bash
|
||||
export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
|
||||
export LVM_ENDPOINT="http://${host_ip}:8399"
|
||||
export LVM_SERVICE_PORT=9399
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/visualqna"
|
||||
source set_env.sh
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
@@ -122,6 +82,8 @@ cd GenAIExamples/VisualQnA/docker_compose/intel/cpu/xeon
|
||||
|
||||
```bash
|
||||
docker compose -f compose.yaml up -d
|
||||
# if use TGI as the LLM serving backend
|
||||
docker compose -f compose_tgi.yaml up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -2,32 +2,31 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
llava-tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-llava-xeon-server
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
container_name: vllm-service
|
||||
ports:
|
||||
- "8399:80"
|
||||
- ${VLLM_PORT:-8399}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
shm_size: 1g
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
host_ip: ${host_ip}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --cuda-graphs 0
|
||||
retries: 100
|
||||
command: --model $LVM_MODEL_ID --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html
|
||||
|
||||
lvm:
|
||||
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
|
||||
container_name: lvm-xeon-server
|
||||
depends_on:
|
||||
llava-tgi-service:
|
||||
vllm-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9399:9399"
|
||||
@@ -37,7 +36,8 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LVM_ENDPOINT: ${LVM_ENDPOINT}
|
||||
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
|
||||
LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
|
||||
LLM_MODEL_ID: ${LVM_MODEL_ID}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
@@ -45,7 +45,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
|
||||
container_name: visualqna-xeon-backend-server
|
||||
depends_on:
|
||||
- llava-tgi-service
|
||||
- vllm-service
|
||||
- lvm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
|
||||
96
VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Normal file
96
VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Normal file
@@ -0,0 +1,96 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
llava-tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-llava-xeon-server
|
||||
ports:
|
||||
- "8399:80"
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --cuda-graphs 0
|
||||
lvm:
|
||||
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
|
||||
container_name: lvm-xeon-server
|
||||
depends_on:
|
||||
llava-tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9399:9399"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LVM_ENDPOINT: ${LVM_ENDPOINT}
|
||||
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
visualqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
|
||||
container_name: visualqna-xeon-backend-server
|
||||
depends_on:
|
||||
- llava-tgi-service
|
||||
- lvm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
visualqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest}
|
||||
container_name: visualqna-xeon-ui-server
|
||||
depends_on:
|
||||
- visualqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
visualqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: visualqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- visualqna-xeon-backend-server
|
||||
- visualqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -6,7 +6,8 @@ pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export no_proxy=$host_ip,$no_proxy
|
||||
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
|
||||
export LVM_ENDPOINT="http://${host_ip}:8399"
|
||||
export LVM_SERVICE_PORT=9399
|
||||
|
||||
@@ -15,15 +15,29 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy
|
||||
docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Pull TGI Gaudi Image
|
||||
### 2. Build vLLM/Pull TGI Gaudi Image
|
||||
|
||||
```bash
|
||||
# vLLM
|
||||
|
||||
# currently you have to build the opea/vllm-gaudi with the habana_main branch and the specific commit locally
|
||||
# we will update it to stable release tag in the future
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd ./vllm-fork/
|
||||
docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
|
||||
cd ..
|
||||
rm -rf vllm-fork
|
||||
```
|
||||
|
||||
```bash
|
||||
# TGI (Optional)
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
```
|
||||
|
||||
### 3. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visuralqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visualqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
@@ -43,11 +57,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
2. `opea/lvm:latest`
|
||||
3. `opea/visualqna:latest`
|
||||
4. `opea/visualqna-ui:latest`
|
||||
5. `opea/nginx`
|
||||
1. `opea/vllm-gaudi:latest`
|
||||
2. `ghcr.io/huggingface/tgi-gaudi:2.0.6` (Optional)
|
||||
3. `opea/lvm:latest`
|
||||
4. `opea/visualqna:latest`
|
||||
5. `opea/visualqna-ui:latest`
|
||||
6. `opea/nginx`
|
||||
|
||||
## 🚀 Start MicroServices and MegaService
|
||||
|
||||
@@ -56,18 +71,10 @@ Then run the command `docker images`, you will have the following 5 Docker Image
|
||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
||||
|
||||
```bash
|
||||
export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
|
||||
export LVM_ENDPOINT="http://${host_ip}:8399"
|
||||
export LVM_SERVICE_PORT=9399
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/visualqna"
|
||||
source set_env.sh
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
|
||||
### Start all the services Docker Containers
|
||||
|
||||
@@ -77,6 +84,8 @@ cd GenAIExamples/VisualQnA/docker_compose/intel/hpu/gaudi/
|
||||
|
||||
```bash
|
||||
docker compose -f compose.yaml up -d
|
||||
# if use TGI as the LLM serving backend
|
||||
docker compose -f compose_tgi.yaml up -d
|
||||
```
|
||||
|
||||
> **_NOTE:_** Users need at least one Gaudi cards to run the VisualQnA successfully.
|
||||
|
||||
@@ -2,41 +2,42 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
llava-tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
container_name: tgi-llava-gaudi-server
|
||||
vllm-gaudi-service:
|
||||
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
|
||||
container_name: vllm-gaudi-service
|
||||
ports:
|
||||
- "8399:80"
|
||||
- ${VLLM_PORT:-8399}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
- "./data:/root/.cache/huggingface/hub"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
LLM_MODEL_ID: ${LVM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
|
||||
MAX_MODEL_LEN: ${MAX_TOTAL_TOKENS:-4096}
|
||||
MAX_SEQ_LEN_TO_CAPTURE: ${MAX_TOTAL_TOKENS:-4096}
|
||||
PT_HPUGRAPH_DISABLE_TENSOR_CACHE: false # https://github.com/HabanaAI/vllm-fork/issues/841#issuecomment-2700421704
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 150
|
||||
command: --model $LVM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html
|
||||
lvm:
|
||||
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
|
||||
container_name: lvm-gaudi-server
|
||||
container_name: lvm-vllm-gaudi-service
|
||||
depends_on:
|
||||
- llava-tgi-service
|
||||
vllm-gaudi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9399:9399"
|
||||
ipc: host
|
||||
@@ -45,7 +46,8 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LVM_ENDPOINT: ${LVM_ENDPOINT}
|
||||
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
|
||||
LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
|
||||
LLM_MODEL_ID: ${LVM_MODEL_ID}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
@@ -53,7 +55,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
|
||||
container_name: visualqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- llava-tgi-service
|
||||
- vllm-gaudi-service
|
||||
- lvm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
|
||||
105
VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Normal file
105
VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Normal file
@@ -0,0 +1,105 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
llava-tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
container_name: tgi-llava-gaudi-server
|
||||
ports:
|
||||
- "8399:80"
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
|
||||
lvm:
|
||||
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
|
||||
container_name: lvm-gaudi-server
|
||||
depends_on:
|
||||
llava-tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9399:9399"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LVM_ENDPOINT: ${LVM_ENDPOINT}
|
||||
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
visualqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
|
||||
container_name: visualqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- llava-tgi-service
|
||||
- lvm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
visualqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest}
|
||||
container_name: visualqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- visualqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
visualqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: visualqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- visualqna-gaudi-backend-server
|
||||
- visualqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -6,7 +6,10 @@ pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export no_proxy=$host_ip,$no_proxy
|
||||
# export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
|
||||
export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
|
||||
export LVM_ENDPOINT="http://${host_ip}:8399"
|
||||
export LVM_SERVICE_PORT=9399
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
Reference in New Issue
Block a user