Refactor docsum (#1336)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
This commit is contained in:
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac
|
|||||||
```bash
|
```bash
|
||||||
git clone https://github.com/opea-project/GenAIComps.git
|
git clone https://github.com/opea-project/GenAIComps.git
|
||||||
cd GenAIComps
|
cd GenAIComps
|
||||||
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
|
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
Then run the command `docker images`, you will have the following four Docker Images:
|
Then run the command `docker images`, you will have the following four Docker Images:
|
||||||
@@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
|||||||
export DOCSUM_LLM_SERVER_PORT="8008"
|
export DOCSUM_LLM_SERVER_PORT="8008"
|
||||||
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
||||||
export DOCSUM_FRONTEND_PORT="5173"
|
export DOCSUM_FRONTEND_PORT="5173"
|
||||||
|
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
|
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
|
||||||
@@ -126,7 +127,7 @@ docker compose up -d
|
|||||||
2. LLM Microservice
|
2. LLM Microservice
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9000/v1/chat/docsum \
|
curl http://${host_ip}:9000/v1/docsum \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ services:
|
|||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
host_ip: ${host_ip}
|
||||||
|
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
|
||||||
volumes:
|
volumes:
|
||||||
- "/var/opea/docsum-service/data:/data"
|
- "/var/opea/docsum-service/data:/data"
|
||||||
shm_size: 1g
|
shm_size: 1g
|
||||||
@@ -27,13 +29,19 @@ services:
|
|||||||
security_opt:
|
security_opt:
|
||||||
- seccomp:unconfined
|
- seccomp:unconfined
|
||||||
ipc: host
|
ipc: host
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||||
|
|
||||||
docsum-llm-server:
|
docsum-llm-server:
|
||||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||||
container_name: docsum-llm-server
|
container_name: docsum-llm-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- docsum-tgi-service
|
docsum-tgi-service:
|
||||||
|
condition: service_healthy
|
||||||
ports:
|
ports:
|
||||||
- "${DOCSUM_LLM_SERVER_PORT}:9000"
|
- "${DOCSUM_LLM_SERVER_PORT}:9000"
|
||||||
ipc: host
|
ipc: host
|
||||||
@@ -51,11 +59,13 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
||||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||||
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
|
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
|
||||||
|
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
|
||||||
|
LOGFLAG: ${LOGFLAG:-False}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
whisper:
|
whisper:
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ You will have the following Docker Images:
|
|||||||
|
|
||||||
1. `opea/docsum-ui:latest`
|
1. `opea/docsum-ui:latest`
|
||||||
2. `opea/docsum:latest`
|
2. `opea/docsum:latest`
|
||||||
3. `opea/llm-docsum-tgi:latest`
|
3. `opea/llm-docsum:latest`
|
||||||
4. `opea/whisper:latest`
|
4. `opea/whisper:latest`
|
||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
@@ -140,7 +140,7 @@ You will have the following Docker Images:
|
|||||||
2. LLM Microservice
|
2. LLM Microservice
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9000/v1/chat/docsum \
|
curl http://${host_ip}:9000/v1/docsum \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
|||||||
@@ -6,36 +6,45 @@ services:
|
|||||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||||
container_name: tgi-server
|
container_name: tgi-server
|
||||||
ports:
|
ports:
|
||||||
- "8008:80"
|
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
host_ip: ${host_ip}
|
||||||
|
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
volumes:
|
volumes:
|
||||||
- "./data:/data"
|
- "./data:/data"
|
||||||
shm_size: 1g
|
shm_size: 1g
|
||||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||||
|
|
||||||
llm-docsum-tgi:
|
llm-docsum-tgi:
|
||||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||||
container_name: llm-docsum-server
|
container_name: llm-docsum-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- tgi-server
|
tgi-server:
|
||||||
|
condition: service_healthy
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- ${DOCSUM_PORT:-9000}:9000
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
LOGFLAG: True
|
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
|
||||||
|
LOGFLAG: ${LOGFLAG:-False}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
whisper:
|
whisper:
|
||||||
|
|||||||
@@ -115,7 +115,7 @@ You will have the following Docker Images:
|
|||||||
|
|
||||||
1. `opea/docsum-ui:latest`
|
1. `opea/docsum-ui:latest`
|
||||||
2. `opea/docsum:latest`
|
2. `opea/docsum:latest`
|
||||||
3. `opea/llm-docsum-tgi:latest`
|
3. `opea/llm-docsum:latest`
|
||||||
4. `opea/whisper:latest`
|
4. `opea/whisper:latest`
|
||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
@@ -132,7 +132,7 @@ You will have the following Docker Images:
|
|||||||
2. LLM Microservice
|
2. LLM Microservice
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9000/v1/chat/docsum \
|
curl http://${host_ip}:9000/v1/docsum \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
|||||||
@@ -2,47 +2,59 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
services:
|
services:
|
||||||
tgi-server:
|
tgi-gaudi-server:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8008:80"
|
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||||
|
volumes:
|
||||||
|
- "./data:/data"
|
||||||
environment:
|
environment:
|
||||||
HABANA_VISIBLE_DEVICES: all
|
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
|
HABANA_VISIBLE_DEVICES: all
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
ENABLE_HPU_GRAPH: true
|
ENABLE_HPU_GRAPH: true
|
||||||
LIMIT_HPU_GRAPH: true
|
LIMIT_HPU_GRAPH: true
|
||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
volumes:
|
host_ip: ${host_ip}
|
||||||
- "./data:/data"
|
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
ipc: host
|
ipc: host
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||||
|
|
||||||
llm-docsum-tgi:
|
llm-docsum-tgi:
|
||||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||||
container_name: llm-docsum-gaudi-server
|
container_name: llm-docsum-gaudi-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- tgi-server
|
tgi-gaudi-server:
|
||||||
|
condition: service_healthy
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- ${DOCSUM_PORT:-9000}:9000
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||||
|
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
LOGFLAG: True
|
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
|
||||||
|
LOGFLAG: ${LOGFLAG:-False}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
whisper:
|
whisper:
|
||||||
@@ -66,7 +78,7 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||||
container_name: docsum-gaudi-backend-server
|
container_name: docsum-gaudi-backend-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- tgi-server
|
- tgi-gaudi-server
|
||||||
- llm-docsum-tgi
|
- llm-docsum-tgi
|
||||||
ports:
|
ports:
|
||||||
- "8888:8888"
|
- "8888:8888"
|
||||||
|
|||||||
@@ -10,10 +10,14 @@ export MAX_INPUT_TOKENS=1024
|
|||||||
export MAX_TOTAL_TOKENS=2048
|
export MAX_TOTAL_TOKENS=2048
|
||||||
|
|
||||||
export no_proxy="${no_proxy},${host_ip}"
|
export no_proxy="${no_proxy},${host_ip}"
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||||
|
|
||||||
|
export LLM_ENDPOINT_PORT=8008
|
||||||
|
export DOCSUM_PORT=9000
|
||||||
|
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||||
|
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||||
|
|||||||
@@ -41,9 +41,9 @@ services:
|
|||||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
||||||
extends: docsum
|
extends: docsum
|
||||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||||
llm-docsum-tgi:
|
llm-docsum:
|
||||||
build:
|
build:
|
||||||
context: GenAIComps
|
context: GenAIComps
|
||||||
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
|
dockerfile: comps/llms/src/doc-summarization/Dockerfile
|
||||||
extends: docsum
|
extends: docsum
|
||||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ class DocSumService:
|
|||||||
name="llm",
|
name="llm",
|
||||||
host=LLM_SERVICE_HOST_IP,
|
host=LLM_SERVICE_HOST_IP,
|
||||||
port=LLM_SERVICE_PORT,
|
port=LLM_SERVICE_PORT,
|
||||||
endpoint="/v1/chat/docsum",
|
endpoint="/v1/docsum",
|
||||||
use_remote_service=True,
|
use_remote_service=True,
|
||||||
service_type=ServiceType.LLM,
|
service_type=ServiceType.LLM,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ spec:
|
|||||||
internalService:
|
internalService:
|
||||||
serviceName: docsum-llm-uservice
|
serviceName: docsum-llm-uservice
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/docsum
|
endpoint: /v1/docsum
|
||||||
PORT: "9009"
|
PORT: "9009"
|
||||||
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||||
- name: TgiGaudi
|
- name: TgiGaudi
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ spec:
|
|||||||
internalService:
|
internalService:
|
||||||
serviceName: docsum-llm-uservice
|
serviceName: docsum-llm-uservice
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/docsum
|
endpoint: /v1/docsum
|
||||||
PORT: "9009"
|
PORT: "9009"
|
||||||
TGI_LLM_ENDPOINT: tgi-svc
|
TGI_LLM_ENDPOINT: tgi-svc
|
||||||
- name: Tgi
|
- name: Tgi
|
||||||
|
|||||||
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
|
|||||||
export MAX_INPUT_TOKENS=2048
|
export MAX_INPUT_TOKENS=2048
|
||||||
export MAX_TOTAL_TOKENS=4096
|
export MAX_TOTAL_TOKENS=4096
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||||
export no_proxy="${no_proxy},${host_ip}"
|
export no_proxy="${no_proxy},${host_ip}"
|
||||||
|
export LLM_ENDPOINT_PORT=8008
|
||||||
|
export DOCSUM_PORT=9000
|
||||||
|
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||||
|
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||||
|
export LOGFLAG=True
|
||||||
|
|
||||||
WORKPATH=$(dirname "$PWD")
|
WORKPATH=$(dirname "$PWD")
|
||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
@@ -37,10 +41,10 @@ function build_docker_images() {
|
|||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
service_list="docsum docsum-gradio-ui whisper llm-docsum"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -49,15 +53,6 @@ function start_services() {
|
|||||||
|
|
||||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
sleep 3m
|
sleep 3m
|
||||||
|
|
||||||
until [[ "$n" -ge 100 ]]; do
|
|
||||||
docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
|
|
||||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 5s
|
|
||||||
n=$((n+1))
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
get_base64_str() {
|
get_base64_str() {
|
||||||
@@ -156,13 +151,13 @@ function validate_microservices() {
|
|||||||
validate_services_json \
|
validate_services_json \
|
||||||
"${host_ip}:8008/generate" \
|
"${host_ip}:8008/generate" \
|
||||||
"generated_text" \
|
"generated_text" \
|
||||||
"tgi-gaudi" \
|
"tgi-gaudi-server" \
|
||||||
"tgi-gaudi-server" \
|
"tgi-gaudi-server" \
|
||||||
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
||||||
|
|
||||||
# llm microservice
|
# llm microservice
|
||||||
validate_services_json \
|
validate_services_json \
|
||||||
"${host_ip}:9000/v1/chat/docsum" \
|
"${host_ip}:9000/v1/docsum" \
|
||||||
"data: " \
|
"data: " \
|
||||||
"llm-docsum-tgi" \
|
"llm-docsum-tgi" \
|
||||||
"llm-docsum-gaudi-server" \
|
"llm-docsum-gaudi-server" \
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
|||||||
export HOST_IP=${ip_address}
|
export HOST_IP=${ip_address}
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
export DOCSUM_TGI_SERVICE_PORT="8008"
|
export DOCSUM_TGI_SERVICE_PORT="8008"
|
||||||
export DOCSUM_TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|
||||||
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export DOCSUM_LLM_SERVER_PORT="9000"
|
export DOCSUM_LLM_SERVER_PORT="9000"
|
||||||
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
||||||
@@ -33,13 +32,15 @@ export ASR_SERVICE_HOST_IP=${host_ip}
|
|||||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
||||||
export DOCSUM_CARD_ID="card1"
|
export DOCSUM_CARD_ID="card1"
|
||||||
export DOCSUM_RENDER_ID="renderD136"
|
export DOCSUM_RENDER_ID="renderD136"
|
||||||
|
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||||
|
export LOGFLAG=True
|
||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
service_list="docsum docsum-gradio-ui whisper llm-docsum"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||||
@@ -52,15 +53,7 @@ function start_services() {
|
|||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
|
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
|
||||||
|
sleep 3m
|
||||||
until [[ "$n" -ge 100 ]]; do
|
|
||||||
docker logs docsum-tgi-service > "${LOG_PATH}"/tgi_service_start.log
|
|
||||||
if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 5s
|
|
||||||
n=$((n+1))
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate_services() {
|
function validate_services() {
|
||||||
@@ -144,7 +137,7 @@ function validate_microservices() {
|
|||||||
|
|
||||||
# llm microservice
|
# llm microservice
|
||||||
validate_services \
|
validate_services \
|
||||||
"${host_ip}:9000/v1/chat/docsum" \
|
"${host_ip}:9000/v1/docsum" \
|
||||||
"data: " \
|
"data: " \
|
||||||
"docsum-llm-server" \
|
"docsum-llm-server" \
|
||||||
"docsum-llm-server" \
|
"docsum-llm-server" \
|
||||||
|
|||||||
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
|
|||||||
export MAX_INPUT_TOKENS=2048
|
export MAX_INPUT_TOKENS=2048
|
||||||
export MAX_TOTAL_TOKENS=4096
|
export MAX_TOTAL_TOKENS=4096
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||||
export no_proxy="${no_proxy},${host_ip}"
|
export no_proxy="${no_proxy},${host_ip}"
|
||||||
|
export LLM_ENDPOINT_PORT=8008
|
||||||
|
export DOCSUM_PORT=9000
|
||||||
|
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||||
|
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||||
|
export LOGFLAG=True
|
||||||
|
|
||||||
WORKPATH=$(dirname "$PWD")
|
WORKPATH=$(dirname "$PWD")
|
||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
@@ -36,7 +40,7 @@ function build_docker_images() {
|
|||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
service_list="docsum docsum-gradio-ui whisper llm-docsum"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||||
@@ -48,15 +52,6 @@ function start_services() {
|
|||||||
|
|
||||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
sleep 3m
|
sleep 3m
|
||||||
|
|
||||||
until [[ "$n" -ge 100 ]]; do
|
|
||||||
docker logs tgi-server > ${LOG_PATH}/tgi_service_start.log
|
|
||||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 5s
|
|
||||||
n=$((n+1))
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
get_base64_str() {
|
get_base64_str() {
|
||||||
@@ -158,13 +153,13 @@ function validate_microservices() {
|
|||||||
validate_services_json \
|
validate_services_json \
|
||||||
"${host_ip}:8008/generate" \
|
"${host_ip}:8008/generate" \
|
||||||
"generated_text" \
|
"generated_text" \
|
||||||
"tgi" \
|
"tgi-server" \
|
||||||
"tgi-server" \
|
"tgi-server" \
|
||||||
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
||||||
|
|
||||||
# llm microservice
|
# llm microservice
|
||||||
validate_services_json \
|
validate_services_json \
|
||||||
"${host_ip}:9000/v1/chat/docsum" \
|
"${host_ip}:9000/v1/docsum" \
|
||||||
"data: " \
|
"data: " \
|
||||||
"llm-docsum-tgi" \
|
"llm-docsum-tgi" \
|
||||||
"llm-docsum-server" \
|
"llm-docsum-server" \
|
||||||
|
|||||||
@@ -293,7 +293,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
|
|||||||
10. DocSum LLM Microservice
|
10. DocSum LLM Microservice
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9003/v1/chat/docsum\
|
curl http://${host_ip}:9003/v1/docsum\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
|
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
|
|||||||
| [opea/guardrails]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use |
|
| [opea/guardrails]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use |
|
||||||
| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
|
| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
|
||||||
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
|
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
|
||||||
| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
|
| [opea/llm-docsum]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
|
||||||
| [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
| [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
||||||
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use |
|
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use |
|
||||||
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |
|
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |
|
||||||
|
|||||||
Reference in New Issue
Block a user