Add new example of SearchQnA for GenAIExample (#448)

Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
This commit is contained in:
Steve Zhang
2024-07-24 15:59:52 +08:00
committed by GitHub
parent e371b1e9d4
commit 21b7d11098
5 changed files with 428 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
<h1 align="center" id="title">Deploy SearchQnA in a Kubernetes Cluster</h1>
This document outlines the deployment process for a Code Generation (SearchQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the searchQnA_xeon.yaml file or if you have a Gaudi cluster you could use searchQnA_gaudi.yaml
In the below example we illustrate on Xeon.
## Deploy the RAG application
1. Create the desired namespace if it does not already exist and deploy the application
```bash
export APP_NAMESPACE=CT
kubectl create ns $APP_NAMESPACE
sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_xeon.yaml
kubectl apply -f ./searchQnA_xeon.yaml
```
2. Check if the application is up and ready
```bash
kubectl get pods -n $APP_NAMESPACE
```
3. Deploy a client pod for testing
```bash
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
```
4. Check that client pod is ready
```bash
kubectl get pods -n $APP_NAMESPACE
```
5. Send request to application
```bash
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log
```

View File

@@ -0,0 +1,65 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: gaudi
name: searchqa
namespace: searchqa
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: Embedding
internalService:
serviceName: embedding-svc
config:
endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
- name: TeiEmbeddingGaudi
internalService:
serviceName: tei-embedding-gaudi-svc
isDownstreamService: true
- name: WebRetriever
data: $response
internalService:
serviceName: web-retriever-svc
config:
endpoint: /v1/web_retrieval
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
GOOGLE_API_KEY: "insert-your-google-api-key-here"
GOOGLE_CSE_ID: "insert-your-google-cse-id-here"
- name: Reranking
data: $response
internalService:
serviceName: reranking-svc
config:
endpoint: /v1/reranking
TEI_RERANKING_ENDPOINT: tei-reranking-svc
- name: TeiReranking
internalService:
serviceName: tei-reranking-svc
config:
endpoint: /rerank
isDownstreamService: true
- name: Llm
data: $response
internalService:
serviceName: llm-svc
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi
internalService:
serviceName: tgi-gaudi-svc
config:
endpoint: /generate
isDownstreamService: true

View File

@@ -0,0 +1,65 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: xeon
name: searchqa
namespace: searchqa
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: Embedding
internalService:
serviceName: embedding-svc
config:
endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
- name: TeiEmbedding
internalService:
serviceName: tei-embedding-svc
isDownstreamService: true
- name: WebRetriever
data: $response
internalService:
serviceName: web-retriever-svc
config:
endpoint: /v1/web_retrieval
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
GOOGLE_API_KEY: "insert-your-google-api-key-here"
GOOGLE_CSE_ID: "insert-your-google-cse-id-here"
- name: Reranking
data: $response
internalService:
serviceName: reranking-svc
config:
endpoint: /v1/reranking
TEI_RERANKING_ENDPOINT: tei-reranking-svc
- name: TeiReranking
internalService:
serviceName: tei-reranking-svc
config:
endpoint: /rerank
isDownstreamService: true
- name: Llm
data: $response
internalService:
serviceName: llm-svc
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-service-m
- name: Tgi
internalService:
serviceName: tgi-service-m
config:
endpoint: /generate
isDownstreamService: true