ProductivitySuite: Update TGI CPU image version to 2.4.0 (#1062)

Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
2024-11-08 09:50:11 +08:00
parent 4635a927fa
commit 11d8b24c8a
3 changed files with 29 additions and 10 deletions
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
 export LLM_SERVICE_HOST_PORT_CODEGEN=9001
 export LLM_SERVICE_HOST_PORT_DOCSUM=9003
 export PROMPT_COLLECTION_NAME="prompt"
+export RERANK_SERVER_PORT=8808
+export EMBEDDING_SERVER_PORT=6006
+export LLM_SERVER_PORT=9009
 ```

 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -26,7 +26,10 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
+      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -70,6 +73,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -111,7 +115,7 @@ services:
      LANGCHAIN_PROJECT: "opea-reranking-service"
    restart: unless-stopped
  tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
@@ -125,7 +129,7 @@ services:
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
  llm:
    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-server
@@ -152,11 +156,12 @@ services:
    depends_on:
      - redis-vector-db
      - tei-embedding-service
-      - embedding
+      - dataprep-redis-service
      - retriever
      - tei-reranking-service
-      - reranking
      - tgi_service
+      - embedding
+      - reranking
      - llm
    ports:
      - "8888:8888"
@@ -165,14 +170,19 @@ services:
      https_proxy: ${https_proxy}
      http_proxy: ${http_proxy}
      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
-      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
-      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
+      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
+      LLM_MODEL: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
    ipc: host
    restart: always
  tgi_service_codegen:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi_service_codegen
    ports:
      - "8028:80"
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log

    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

@@ -74,6 +74,9 @@ function start_services() {
    export LLM_SERVICE_HOST_PORT_FAQGEN=9002
    export LLM_SERVICE_HOST_PORT_CODEGEN=9001
    export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+    export RERANK_SERVER_PORT=8808
+    export EMBEDDING_SERVER_PORT=6006
+    export LLM_SERVER_PORT=9009
    export PROMPT_COLLECTION_NAME="prompt"

    # Start Docker Containers
@@ -116,6 +119,9 @@ function validate_service() {
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
    elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
+    elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
+	local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
    else
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
    fi
@@ -315,7 +321,7 @@ function validate_megaservice() {
    # Curl the DocSum Mega Service
    validate_service \
        "${ip_address}:8890/v1/docsum" \
-        "toolkit" \
+        "embedding" \
        "docsum-xeon-backend-server" \
        "docsum-xeon-backend-server" \
        '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'