Refactor docsum (#1336)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
This commit is contained in:
XinyaoWa
2025-01-13 15:49:48 +08:00
committed by GitHub
parent ca15fe9bdb
commit ff1310b11a
16 changed files with 94 additions and 75 deletions

View File

@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
```
Then run the command `docker images`, you will have the following four Docker Images:
@@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export DOCSUM_LLM_SERVER_PORT="8008"
export DOCSUM_BACKEND_SERVER_PORT="8888"
export DOCSUM_FRONTEND_PORT="5173"
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
```
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
@@ -126,7 +127,7 @@ docker compose up -d
2. LLM Microservice
```bash
curl http://${host_ip}:9000/v1/chat/docsum \
curl http://${host_ip}:9000/v1/docsum \
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json'

View File

@@ -13,6 +13,8 @@ services:
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
volumes:
- "/var/opea/docsum-service/data:/data"
shm_size: 1g
@@ -27,13 +29,19 @@ services:
security_opt:
- seccomp:unconfined
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
docsum-llm-server:
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: docsum-llm-server
depends_on:
- docsum-tgi-service
docsum-tgi-service:
condition: service_healthy
ports:
- "${DOCSUM_LLM_SERVER_PORT}:9000"
ipc: host
@@ -51,11 +59,13 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped
whisper: