Fix ChatQnA microservice issues on Xeon (#125)
Signed-off-by: letonghan <letong.han@intel.com>
This commit is contained in:
@@ -24,7 +24,7 @@ pip install .
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/gen-ai-comps:embedding-tei-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/docker/Dockerfile .
|
||||
docker build -t opea/gen-ai-comps:embedding-tei-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
@@ -45,19 +45,12 @@ docker build -t opea/gen-ai-comps:reranking-tei-xeon-server --build-arg https_pr
|
||||
docker build -t opea/gen-ai-comps:llm-tgi-server --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/langchain/docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Pull qna-rag-redis-server Image
|
||||
|
||||
```bash
|
||||
docker pull intel/gen-ai-examples:qna-rag-redis-server
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following four Docker Images:
|
||||
|
||||
1. `opea/gen-ai-comps:embedding-tei-server`
|
||||
2. `opea/gen-ai-comps:retriever-redis-server`
|
||||
3. `opea/gen-ai-comps:reranking-tei-xeon-server`
|
||||
4. `opea/gen-ai-comps:llm-tgi-server`
|
||||
5. `intel/gen-ai-examples:qna-rag-redis-server`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -70,17 +63,15 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-large"
|
||||
export LLM_MODEL_ID="m-a-p/OpenCodeInterpreter-DS-6.7B"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:8090"
|
||||
export TEI_RERANKING_ENDPOINT="http://${your_ip}:6060"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:9009"
|
||||
export REDIS_URL="redis://${your_ip}:6379"
|
||||
export INDEX_NAME=${your_index_name}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
```
|
||||
|
||||
Note: Please replace with `your_ip` with you external IP address, do not use localhost.
|
||||
|
||||
### Start Microservice Docker Containers
|
||||
|
||||
```bash
|
||||
@@ -92,7 +83,7 @@ docker compose -f docker_compose.yaml up -d
|
||||
1. TEI Embedding Service
|
||||
|
||||
```bash
|
||||
curl ${your_ip}:8090/embed \
|
||||
curl ${your_ip}:6006/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -108,18 +99,27 @@ curl http://${your_ip}:6000/v1/embeddings\
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
|
||||
|
||||
```Python
|
||||
import random
|
||||
embedding = [random.uniform(-1, 1) for _ in range(768)]
|
||||
print(embedding)
|
||||
```
|
||||
|
||||
Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:7000/v1/retrieval\
|
||||
-X POST \
|
||||
-d '{"text":"test","embedding":[1,1,...1]}' \
|
||||
-d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:6060/rerank \
|
||||
curl http://${your_ip}:8808/rerank \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -137,7 +137,7 @@ curl http://${your_ip}:8000/v1/reranking\
|
||||
6. TGI Service
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:8008/generate \
|
||||
curl http://${your_ip}:9009/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -152,6 +152,8 @@ curl http://${your_ip}:9000/v1/chat/completions\
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
|
||||
|
||||
## 🚀 Construct Mega Service
|
||||
|
||||
Modify the `initial_inputs` of line 34 in `chatqna.py`, then you will get the ChatQnA result of this mega service.
|
||||
|
||||
@@ -31,7 +31,7 @@ class MyServiceOrchestrator:
|
||||
self.service_builder.flow_to(rerank, llm)
|
||||
|
||||
def schedule(self):
|
||||
self.service_builder.schedule(initial_inputs={"text": "What is the revenue of Nike?"})
|
||||
self.service_builder.schedule(initial_inputs={"text": "What is the revenue of Nike in 2023?"})
|
||||
self.service_builder.get_all_final_outputs()
|
||||
result_dict = self.service_builder.result_dict
|
||||
print(result_dict)
|
||||
|
||||
@@ -21,30 +21,11 @@ services:
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
qna-rag-redis-server:
|
||||
image: intel/gen-ai-examples:qna-rag-redis-server
|
||||
container_name: qna-rag-redis-server
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
REDIS_PORT: 6379
|
||||
EMBED_MODEL: BAAI/bge-base-en-v1.5
|
||||
REDIS_SCHEMA: schema_dim_768.yml
|
||||
VECTOR_DATABASE: REDIS
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1 # Set memlock to unlimited (no soft or hard limit)
|
||||
hard: -1
|
||||
volumes:
|
||||
- ../redis:/ws
|
||||
- ../test:/test
|
||||
network_mode: "host"
|
||||
tei_embedding_service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
container_name: tei_embedding_server
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
- "6006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
@@ -53,18 +34,19 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID}
|
||||
embedding:
|
||||
image: intel/gen-ai-comps:embedding-tei-server
|
||||
image: opea/gen-ai-comps:embedding-tei-server
|
||||
container_name: embedding-tei-server
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: intel/gen-ai-comps:retriever-redis-server
|
||||
image: opea/gen-ai-comps:retriever-redis-server
|
||||
container_name: retriever-redis-server
|
||||
ports:
|
||||
- "7000:7000"
|
||||
@@ -74,10 +56,11 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei_xeon_service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
container_name: tei_xeon_server
|
||||
container_name: tei-xeon-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
@@ -88,7 +71,7 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${RERANK_MODEL_ID}
|
||||
reranking:
|
||||
image: intel/gen-ai-comps:reranking-tei-xeon-server
|
||||
image: opea/gen-ai-comps:reranking-tei-xeon-server
|
||||
container_name: reranking-tei-xeon-server
|
||||
ports:
|
||||
- "8000:8000"
|
||||
@@ -101,15 +84,15 @@ services:
|
||||
restart: unless-stopped
|
||||
tgi_service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
container_name: tgi_service
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
- "9009:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
command: --model-id ${LLM_MODEL_ID}
|
||||
llm:
|
||||
image: intel/gen-ai-comps:llm-tgi-server
|
||||
image: opea/gen-ai-comps:llm-tgi-server
|
||||
container_name: llm-tgi-server
|
||||
ports:
|
||||
- "9000:9000"
|
||||
|
||||
Reference in New Issue
Block a user