Fix ChatQnA microservice issues on Xeon (#125)

Signed-off-by: letonghan <letong.han@intel.com>
2024-05-13 15:38:58 +08:00
parent 355d0b64de
commit 4885e6d8c7
3 changed files with 32 additions and 47 deletions
--- a/ChatQnA/microservice/xeon/README.md
+++ b/ChatQnA/microservice/xeon/README.md
@@ -24,7 +24,7 @@ pip install .
 ### 1. Build Embedding Image

 ```bash
-docker build -t opea/gen-ai-comps:embedding-tei-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/docker/Dockerfile .
+docker build -t opea/gen-ai-comps:embedding-tei-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
 ```

 ### 2. Build Retriever Image
@@ -45,19 +45,12 @@ docker build -t opea/gen-ai-comps:reranking-tei-xeon-server --build-arg https_pr
 docker build -t opea/gen-ai-comps:llm-tgi-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/langchain/docker/Dockerfile .
 ```

-### 5. Pull qna-rag-redis-server Image
-
-```bash
-docker pull intel/gen-ai-examples:qna-rag-redis-server
-```
-
 Then run the command `docker images`, you will have the following four Docker Images:

 1. `opea/gen-ai-comps:embedding-tei-server`
 2. `opea/gen-ai-comps:retriever-redis-server`
 3. `opea/gen-ai-comps:reranking-tei-xeon-server`
 4. `opea/gen-ai-comps:llm-tgi-server`
-5. `intel/gen-ai-examples:qna-rag-redis-server`

 ## 🚀 Start Microservices

@@ -70,17 +63,15 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-large"
-export LLM_MODEL_ID="m-a-p/OpenCodeInterpreter-DS-6.7B"
-export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:8090"
-export TEI_RERANKING_ENDPOINT="http://${your_ip}:6060"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${your_ip}:9009"
 export REDIS_URL="redis://${your_ip}:6379"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 ```

-Note: Please replace with `your_ip` with you external IP address, do not use localhost.
-
 ### Start Microservice Docker Containers

 ```bash
@@ -92,7 +83,7 @@ docker compose -f docker_compose.yaml up -d
 1. TEI Embedding Service

 ```bash
-curl ${your_ip}:8090/embed \
+curl ${your_ip}:6006/embed \
    -X POST \
    -d '{"inputs":"What is Deep Learning?"}' \
    -H 'Content-Type: application/json'
@@ -108,18 +99,27 @@ curl http://${your_ip}:6000/v1/embeddings\
 ```

 3. Retriever Microservice
+   To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+
+```Python
+import random
+embedding = [random.uniform(-1, 1) for _ in range(768)]
+print(embedding)
+```
+
+Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:

 ```bash
 curl http://${your_ip}:7000/v1/retrieval\
  -X POST \
-  -d '{"text":"test","embedding":[1,1,...1]}' \
+  -d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}' \
  -H 'Content-Type: application/json'
 ```

 4. TEI Reranking Service

 ```bash
-curl http://${your_ip}:6060/rerank \
+curl http://${your_ip}:8808/rerank \
    -X POST \
    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
    -H 'Content-Type: application/json'
@@ -137,7 +137,7 @@ curl http://${your_ip}:8000/v1/reranking\
 6. TGI Service

 ```bash
-curl http://${your_ip}:8008/generate \
+curl http://${your_ip}:9009/generate \
  -X POST \
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
  -H 'Content-Type: application/json'
@@ -152,6 +152,8 @@ curl http://${your_ip}:9000/v1/chat/completions\
  -H 'Content-Type: application/json'
 ```

+Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
+
 ## 🚀 Construct Mega Service

 Modify the `initial_inputs` of line 34 in `chatqna.py`, then you will get the ChatQnA result of this mega service.
--- a/ChatQnA/microservice/xeon/chatqna.py
+++ b/ChatQnA/microservice/xeon/chatqna.py
@@ -31,7 +31,7 @@ class MyServiceOrchestrator:
        self.service_builder.flow_to(rerank, llm)

    def schedule(self):
-        self.service_builder.schedule(initial_inputs={"text": "What is the revenue of Nike?"})
+        self.service_builder.schedule(initial_inputs={"text": "What is the revenue of Nike in 2023?"})
        self.service_builder.get_all_final_outputs()
        result_dict = self.service_builder.result_dict
        print(result_dict)
--- a/ChatQnA/microservice/xeon/docker_compose.yaml
+++ b/ChatQnA/microservice/xeon/docker_compose.yaml
@@ -21,30 +21,11 @@ services:
    ports:
      - "6379:6379"
      - "8001:8001"
-  qna-rag-redis-server:
-    image: intel/gen-ai-examples:qna-rag-redis-server
-    container_name: qna-rag-redis-server
-    environment:
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      REDIS_PORT: 6379
-      EMBED_MODEL: BAAI/bge-base-en-v1.5
-      REDIS_SCHEMA: schema_dim_768.yml
-      VECTOR_DATABASE: REDIS
-    ulimits:
-      memlock:
-        soft: -1 # Set memlock to unlimited (no soft or hard limit)
-        hard: -1
-    volumes:
-      - ../redis:/ws
-      - ../test:/test
-    network_mode: "host"
  tei_embedding_service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-    container_name: tei_embedding_server
+    container_name: tei-embedding-server
    ports:
-      - "8090:80"
+      - "6006:80"
    volumes:
      - "./data:/data"
    shm_size: 1g
@@ -53,18 +34,19 @@ services:
      https_proxy: ${https_proxy}
    command: --model-id ${EMBEDDING_MODEL_ID}
  embedding:
-    image: intel/gen-ai-comps:embedding-tei-server
+    image: opea/gen-ai-comps:embedding-tei-server
    container_name: embedding-tei-server
    ports:
      - "6000:6000"
    ipc: host
    environment:
      http_proxy: ${http_proxy}
+
      https_proxy: ${https_proxy}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
    restart: unless-stopped
  retriever:
-    image: intel/gen-ai-comps:retriever-redis-server
+    image: opea/gen-ai-comps:retriever-redis-server
    container_name: retriever-redis-server
    ports:
      - "7000:7000"
@@ -74,10 +56,11 @@ services:
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
    restart: unless-stopped
  tei_xeon_service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-    container_name: tei_xeon_server
+    container_name: tei-xeon-server
    ports:
      - "8808:80"
    volumes:
@@ -88,7 +71,7 @@ services:
      https_proxy: ${https_proxy}
    command: --model-id ${RERANK_MODEL_ID}
  reranking:
-    image: intel/gen-ai-comps:reranking-tei-xeon-server
+    image: opea/gen-ai-comps:reranking-tei-xeon-server
    container_name: reranking-tei-xeon-server
    ports:
      - "8000:8000"
@@ -101,15 +84,15 @@ services:
    restart: unless-stopped
  tgi_service:
    image: ghcr.io/huggingface/text-generation-inference:1.4
-    container_name: tgi_service
+    container_name: tgi-service
    ports:
-      - "8008:80"
+      - "9009:80"
    volumes:
      - "./data:/data"
    shm_size: 1g
    command: --model-id ${LLM_MODEL_ID}
  llm:
-    image: intel/gen-ai-comps:llm-tgi-server
+    image: opea/gen-ai-comps:llm-tgi-server
    container_name: llm-tgi-server
    ports:
      - "9000:9000"