ProductivitySuite: Update TGI CPU image version to 2.4.0 (#1062)

Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
This commit is contained in:
Hoong Tee, Yeoh
2024-11-08 09:50:11 +08:00
committed by GitHub
parent 4635a927fa
commit 11d8b24c8a
3 changed files with 29 additions and 10 deletions

View File

@@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
export PROMPT_COLLECTION_NAME="prompt"
export RERANK_SERVER_PORT=8808
export EMBEDDING_SERVER_PORT=6006
export LLM_SERVER_PORT=9009
```
Note: Please replace with `host_ip` with you external IP address, do not use localhost.

View File

@@ -26,7 +26,10 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
@@ -70,6 +73,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -111,7 +115,7 @@ services:
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
tgi_service:
image: ghcr.io/huggingface/text-generation-inference:2.1.0
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "9009:80"
@@ -125,7 +129,7 @@ services:
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID}
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
@@ -152,11 +156,12 @@ services:
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- dataprep-redis-service
- retriever
- tei-reranking-service
- reranking
- tgi_service
- embedding
- reranking
- llm
ports:
- "8888:8888"
@@ -165,14 +170,19 @@ services:
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
LLM_MODEL: ${LLM_MODEL_ID}
LOGFLAG: ${LOGFLAG}
ipc: host
restart: always
tgi_service_codegen:
image: ghcr.io/huggingface/text-generation-inference:2.1.0
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi_service_codegen
ports:
- "8028:80"

View File

@@ -22,7 +22,7 @@ function build_docker_images() {
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker images && sleep 1s
}
@@ -74,6 +74,9 @@ function start_services() {
export LLM_SERVICE_HOST_PORT_FAQGEN=9002
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
export RERANK_SERVER_PORT=8808
export EMBEDDING_SERVER_PORT=6006
export LLM_SERVER_PORT=9009
export PROMPT_COLLECTION_NAME="prompt"
# Start Docker Containers
@@ -116,6 +119,9 @@ function validate_service() {
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
else
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
fi
@@ -315,7 +321,7 @@ function validate_megaservice() {
# Curl the DocSum Mega Service
validate_service \
"${ip_address}:8890/v1/docsum" \
"toolkit" \
"embedding" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
'{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'