Refactor docsum (#1336)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
This commit is contained in:
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
|
||||
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following four Docker Images:
|
||||
@@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export DOCSUM_LLM_SERVER_PORT="8008"
|
||||
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
||||
export DOCSUM_FRONTEND_PORT="5173"
|
||||
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
|
||||
@@ -126,7 +127,7 @@ docker compose up -d
|
||||
2. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/docsum \
|
||||
curl http://${host_ip}:9000/v1/docsum \
|
||||
-X POST \
|
||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
@@ -13,6 +13,8 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
|
||||
volumes:
|
||||
- "/var/opea/docsum-service/data:/data"
|
||||
shm_size: 1g
|
||||
@@ -27,13 +29,19 @@ services:
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
ipc: host
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||
|
||||
docsum-llm-server:
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||
container_name: docsum-llm-server
|
||||
depends_on:
|
||||
- docsum-tgi-service
|
||||
docsum-tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "${DOCSUM_LLM_SERVER_PORT}:9000"
|
||||
ipc: host
|
||||
@@ -51,11 +59,13 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
|
||||
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
|
||||
whisper:
|
||||
|
||||
@@ -123,7 +123,7 @@ You will have the following Docker Images:
|
||||
|
||||
1. `opea/docsum-ui:latest`
|
||||
2. `opea/docsum:latest`
|
||||
3. `opea/llm-docsum-tgi:latest`
|
||||
3. `opea/llm-docsum:latest`
|
||||
4. `opea/whisper:latest`
|
||||
|
||||
### Validate Microservices
|
||||
@@ -140,7 +140,7 @@ You will have the following Docker Images:
|
||||
2. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/docsum \
|
||||
curl http://${host_ip}:9000/v1/docsum \
|
||||
-X POST \
|
||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
@@ -6,36 +6,45 @@ services:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||
|
||||
llm-docsum-tgi:
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||
container_name: llm-docsum-server
|
||||
depends_on:
|
||||
- tgi-server
|
||||
tgi-server:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- ${DOCSUM_PORT:-9000}:9000
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
LOGFLAG: True
|
||||
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
|
||||
whisper:
|
||||
|
||||
@@ -115,7 +115,7 @@ You will have the following Docker Images:
|
||||
|
||||
1. `opea/docsum-ui:latest`
|
||||
2. `opea/docsum:latest`
|
||||
3. `opea/llm-docsum-tgi:latest`
|
||||
3. `opea/llm-docsum:latest`
|
||||
4. `opea/whisper:latest`
|
||||
|
||||
### Validate Microservices
|
||||
@@ -132,7 +132,7 @@ You will have the following Docker Images:
|
||||
2. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/docsum \
|
||||
curl http://${host_ip}:9000/v1/docsum \
|
||||
-X POST \
|
||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
@@ -2,47 +2,59 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
tgi-gaudi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
host_ip: ${host_ip}
|
||||
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||
|
||||
llm-docsum-tgi:
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||
container_name: llm-docsum-gaudi-server
|
||||
depends_on:
|
||||
- tgi-server
|
||||
tgi-gaudi-server:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- ${DOCSUM_PORT:-9000}:9000
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
LOGFLAG: True
|
||||
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
|
||||
whisper:
|
||||
@@ -66,7 +78,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||
container_name: docsum-gaudi-backend-server
|
||||
depends_on:
|
||||
- tgi-server
|
||||
- tgi-gaudi-server
|
||||
- llm-docsum-tgi
|
||||
ports:
|
||||
- "8888:8888"
|
||||
|
||||
@@ -10,10 +10,14 @@ export MAX_INPUT_TOKENS=1024
|
||||
export MAX_TOTAL_TOKENS=2048
|
||||
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export DOCSUM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||
|
||||
@@ -41,9 +41,9 @@ services:
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
||||
extends: docsum
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
llm-docsum-tgi:
|
||||
llm-docsum:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
|
||||
dockerfile: comps/llms/src/doc-summarization/Dockerfile
|
||||
extends: docsum
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
|
||||
|
||||
@@ -146,7 +146,7 @@ class DocSumService:
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/docsum",
|
||||
endpoint="/v1/docsum",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
|
||||
@@ -23,7 +23,7 @@ spec:
|
||||
internalService:
|
||||
serviceName: docsum-llm-uservice
|
||||
config:
|
||||
endpoint: /v1/chat/docsum
|
||||
endpoint: /v1/docsum
|
||||
PORT: "9009"
|
||||
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||
- name: TgiGaudi
|
||||
|
||||
@@ -23,7 +23,7 @@ spec:
|
||||
internalService:
|
||||
serviceName: docsum-llm-uservice
|
||||
config:
|
||||
endpoint: /v1/chat/docsum
|
||||
endpoint: /v1/docsum
|
||||
PORT: "9009"
|
||||
TGI_LLM_ENDPOINT: tgi-svc
|
||||
- name: Tgi
|
||||
|
||||
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export DOCSUM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||
export LOGFLAG=True
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
@@ -37,10 +41,10 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
||||
service_list="docsum docsum-gradio-ui whisper llm-docsum"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -49,15 +53,6 @@ function start_services() {
|
||||
|
||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
sleep 3m
|
||||
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
|
||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
get_base64_str() {
|
||||
@@ -156,13 +151,13 @@ function validate_microservices() {
|
||||
validate_services_json \
|
||||
"${host_ip}:8008/generate" \
|
||||
"generated_text" \
|
||||
"tgi-gaudi" \
|
||||
"tgi-gaudi-server" \
|
||||
"tgi-gaudi-server" \
|
||||
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
||||
|
||||
# llm microservice
|
||||
validate_services_json \
|
||||
"${host_ip}:9000/v1/chat/docsum" \
|
||||
"${host_ip}:9000/v1/docsum" \
|
||||
"data: " \
|
||||
"llm-docsum-tgi" \
|
||||
"llm-docsum-gaudi-server" \
|
||||
|
||||
@@ -22,7 +22,6 @@ export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HOST_IP=${ip_address}
|
||||
export host_ip=${ip_address}
|
||||
export DOCSUM_TGI_SERVICE_PORT="8008"
|
||||
export DOCSUM_TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export DOCSUM_LLM_SERVER_PORT="9000"
|
||||
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
||||
@@ -33,13 +32,15 @@ export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
||||
export DOCSUM_CARD_ID="card1"
|
||||
export DOCSUM_RENDER_ID="renderD136"
|
||||
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||
export LOGFLAG=True
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
||||
service_list="docsum docsum-gradio-ui whisper llm-docsum"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||
@@ -52,15 +53,7 @@ function start_services() {
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
|
||||
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs docsum-tgi-service > "${LOG_PATH}"/tgi_service_start.log
|
||||
if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
sleep 3m
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
@@ -144,7 +137,7 @@ function validate_microservices() {
|
||||
|
||||
# llm microservice
|
||||
validate_services \
|
||||
"${host_ip}:9000/v1/chat/docsum" \
|
||||
"${host_ip}:9000/v1/docsum" \
|
||||
"data: " \
|
||||
"docsum-llm-server" \
|
||||
"docsum-llm-server" \
|
||||
|
||||
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export DOCSUM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
|
||||
export LOGFLAG=True
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
@@ -36,7 +40,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
||||
service_list="docsum docsum-gradio-ui whisper llm-docsum"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||
@@ -48,15 +52,6 @@ function start_services() {
|
||||
|
||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
sleep 3m
|
||||
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-server > ${LOG_PATH}/tgi_service_start.log
|
||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
get_base64_str() {
|
||||
@@ -158,13 +153,13 @@ function validate_microservices() {
|
||||
validate_services_json \
|
||||
"${host_ip}:8008/generate" \
|
||||
"generated_text" \
|
||||
"tgi" \
|
||||
"tgi-server" \
|
||||
"tgi-server" \
|
||||
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
||||
|
||||
# llm microservice
|
||||
validate_services_json \
|
||||
"${host_ip}:9000/v1/chat/docsum" \
|
||||
"${host_ip}:9000/v1/docsum" \
|
||||
"data: " \
|
||||
"llm-docsum-tgi" \
|
||||
"llm-docsum-server" \
|
||||
|
||||
@@ -293,7 +293,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
|
||||
10. DocSum LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9003/v1/chat/docsum\
|
||||
curl http://${host_ip}:9003/v1/docsum\
|
||||
-X POST \
|
||||
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
@@ -68,7 +68,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
|
||||
| [opea/guardrails]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use |
|
||||
| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
|
||||
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
|
||||
| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
|
||||
| [opea/llm-docsum]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
|
||||
| [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
||||
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use |
|
||||
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |
|
||||
|
||||
Reference in New Issue
Block a user