ProductivitySuite: Update TGI CPU image version to 2.4.0 (#1062)
Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
This commit is contained in:
@@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
|
||||
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
|
||||
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
|
||||
export PROMPT_COLLECTION_NAME="prompt"
|
||||
export RERANK_SERVER_PORT=8808
|
||||
export EMBEDDING_SERVER_PORT=6006
|
||||
export LLM_SERVER_PORT=9009
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
|
||||
@@ -26,7 +26,10 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
@@ -70,6 +73,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
@@ -111,7 +115,7 @@ services:
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
restart: unless-stopped
|
||||
tgi_service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.1.0
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
@@ -125,7 +129,7 @@ services:
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
@@ -152,11 +156,12 @@ services:
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- dataprep-redis-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi_service
|
||||
- embedding
|
||||
- reranking
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
@@ -165,14 +170,19 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
|
||||
EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
|
||||
EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
|
||||
EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
|
||||
RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
|
||||
RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
|
||||
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
|
||||
RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
|
||||
RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
|
||||
LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
|
||||
LLM_MODEL: ${LLM_MODEL_ID}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
tgi_service_codegen:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.1.0
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi_service_codegen
|
||||
ports:
|
||||
- "8028:80"
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -74,6 +74,9 @@ function start_services() {
|
||||
export LLM_SERVICE_HOST_PORT_FAQGEN=9002
|
||||
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
|
||||
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
|
||||
export RERANK_SERVER_PORT=8808
|
||||
export EMBEDDING_SERVER_PORT=6006
|
||||
export LLM_SERVER_PORT=9009
|
||||
export PROMPT_COLLECTION_NAME="prompt"
|
||||
|
||||
# Start Docker Containers
|
||||
@@ -116,6 +119,9 @@ function validate_service() {
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
|
||||
elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
|
||||
elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
|
||||
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
|
||||
else
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
fi
|
||||
@@ -315,7 +321,7 @@ function validate_megaservice() {
|
||||
# Curl the DocSum Mega Service
|
||||
validate_service \
|
||||
"${ip_address}:8890/v1/docsum" \
|
||||
"toolkit" \
|
||||
"embedding" \
|
||||
"docsum-xeon-backend-server" \
|
||||
"docsum-xeon-backend-server" \
|
||||
'{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
|
||||
|
||||
Reference in New Issue
Block a user