Fix ChatQnA microservice issues on Xeon (#125)

Signed-off-by: letonghan <letong.han@intel.com>
This commit is contained in:
Letong Han
2024-05-13 15:38:58 +08:00
committed by GitHub
parent 355d0b64de
commit 4885e6d8c7
3 changed files with 32 additions and 47 deletions

View File

@@ -24,7 +24,7 @@ pip install .
### 1. Build Embedding Image
```bash
docker build -t opea/gen-ai-comps:embedding-tei-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/docker/Dockerfile .
docker build -t opea/gen-ai-comps:embedding-tei-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
```
### 2. Build Retriever Image
@@ -45,19 +45,12 @@ docker build -t opea/gen-ai-comps:reranking-tei-xeon-server --build-arg https_pr
docker build -t opea/gen-ai-comps:llm-tgi-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/langchain/docker/Dockerfile .
```
### 5. Pull qna-rag-redis-server Image
```bash
docker pull intel/gen-ai-examples:qna-rag-redis-server
```
Then run the command `docker images`, you will have the following four Docker Images:
1. `opea/gen-ai-comps:embedding-tei-server`
2. `opea/gen-ai-comps:retriever-redis-server`
3. `opea/gen-ai-comps:reranking-tei-xeon-server`
4. `opea/gen-ai-comps:llm-tgi-server`
5. `intel/gen-ai-examples:qna-rag-redis-server`
## 🚀 Start Microservices
@@ -70,17 +63,15 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-large"
export LLM_MODEL_ID="m-a-p/OpenCodeInterpreter-DS-6.7B"
export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:8090"
export TEI_RERANKING_ENDPOINT="http://${your_ip}:6060"
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808"
export TGI_LLM_ENDPOINT="http://${your_ip}:9009"
export REDIS_URL="redis://${your_ip}:6379"
export INDEX_NAME=${your_index_name}
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
```
Note: Please replace with `your_ip` with you external IP address, do not use localhost.
### Start Microservice Docker Containers
```bash
@@ -92,7 +83,7 @@ docker compose -f docker_compose.yaml up -d
1. TEI Embedding Service
```bash
curl ${your_ip}:8090/embed \
curl ${your_ip}:6006/embed \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
@@ -108,18 +99,27 @@ curl http://${your_ip}:6000/v1/embeddings\
```
3. Retriever Microservice
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
```Python
import random
embedding = [random.uniform(-1, 1) for _ in range(768)]
print(embedding)
```
Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
```bash
curl http://${your_ip}:7000/v1/retrieval\
-X POST \
-d '{"text":"test","embedding":[1,1,...1]}' \
-d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}' \
-H 'Content-Type: application/json'
```
4. TEI Reranking Service
```bash
curl http://${your_ip}:6060/rerank \
curl http://${your_ip}:8808/rerank \
-X POST \
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-H 'Content-Type: application/json'
@@ -137,7 +137,7 @@ curl http://${your_ip}:8000/v1/reranking\
6. TGI Service
```bash
curl http://${your_ip}:8008/generate \
curl http://${your_ip}:9009/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
@@ -152,6 +152,8 @@ curl http://${your_ip}:9000/v1/chat/completions\
-H 'Content-Type: application/json'
```
Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
## 🚀 Construct Mega Service
Modify the `initial_inputs` of line 34 in `chatqna.py`, then you will get the ChatQnA result of this mega service.

View File

@@ -31,7 +31,7 @@ class MyServiceOrchestrator:
self.service_builder.flow_to(rerank, llm)
def schedule(self):
self.service_builder.schedule(initial_inputs={"text": "What is the revenue of Nike?"})
self.service_builder.schedule(initial_inputs={"text": "What is the revenue of Nike in 2023?"})
self.service_builder.get_all_final_outputs()
result_dict = self.service_builder.result_dict
print(result_dict)

View File

@@ -21,30 +21,11 @@ services:
ports:
- "6379:6379"
- "8001:8001"
qna-rag-redis-server:
image: intel/gen-ai-examples:qna-rag-redis-server
container_name: qna-rag-redis-server
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
REDIS_PORT: 6379
EMBED_MODEL: BAAI/bge-base-en-v1.5
REDIS_SCHEMA: schema_dim_768.yml
VECTOR_DATABASE: REDIS
ulimits:
memlock:
soft: -1 # Set memlock to unlimited (no soft or hard limit)
hard: -1
volumes:
- ../redis:/ws
- ../test:/test
network_mode: "host"
tei_embedding_service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
container_name: tei_embedding_server
container_name: tei-embedding-server
ports:
- "8090:80"
- "6006:80"
volumes:
- "./data:/data"
shm_size: 1g
@@ -53,18 +34,19 @@ services:
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: intel/gen-ai-comps:embedding-tei-server
image: opea/gen-ai-comps:embedding-tei-server
container_name: embedding-tei-server
ports:
- "6000:6000"
ipc: host
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: intel/gen-ai-comps:retriever-redis-server
image: opea/gen-ai-comps:retriever-redis-server
container_name: retriever-redis-server
ports:
- "7000:7000"
@@ -74,10 +56,11 @@ services:
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei_xeon_service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
container_name: tei_xeon_server
container_name: tei-xeon-server
ports:
- "8808:80"
volumes:
@@ -88,7 +71,7 @@ services:
https_proxy: ${https_proxy}
command: --model-id ${RERANK_MODEL_ID}
reranking:
image: intel/gen-ai-comps:reranking-tei-xeon-server
image: opea/gen-ai-comps:reranking-tei-xeon-server
container_name: reranking-tei-xeon-server
ports:
- "8000:8000"
@@ -101,15 +84,15 @@ services:
restart: unless-stopped
tgi_service:
image: ghcr.io/huggingface/text-generation-inference:1.4
container_name: tgi_service
container_name: tgi-service
ports:
- "8008:80"
- "9009:80"
volumes:
- "./data:/data"
shm_size: 1g
command: --model-id ${LLM_MODEL_ID}
llm:
image: intel/gen-ai-comps:llm-tgi-server
image: opea/gen-ai-comps:llm-tgi-server
container_name: llm-tgi-server
ports:
- "9000:9000"