Add new example of SearchQnA for GenAIExample (#448)
Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
This commit is contained in:
40
SearchQnA/kubernetes/README.md
Normal file
40
SearchQnA/kubernetes/README.md
Normal file
@@ -0,0 +1,40 @@
|
||||
<h1 align="center" id="title">Deploy SearchQnA in a Kubernetes Cluster</h1>
|
||||
|
||||
This document outlines the deployment process for a Code Generation (SearchQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
|
||||
|
||||
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||
|
||||
If you have only Intel Xeon machines you could use the searchQnA_xeon.yaml file or if you have a Gaudi cluster you could use searchQnA_gaudi.yaml
|
||||
In the below example we illustrate on Xeon.
|
||||
|
||||
## Deploy the RAG application
|
||||
|
||||
1. Create the desired namespace if it does not already exist and deploy the application
|
||||
```bash
|
||||
export APP_NAMESPACE=CT
|
||||
kubectl create ns $APP_NAMESPACE
|
||||
sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_xeon.yaml
|
||||
kubectl apply -f ./searchQnA_xeon.yaml
|
||||
```
|
||||
|
||||
2. Check if the application is up and ready
|
||||
```bash
|
||||
kubectl get pods -n $APP_NAMESPACE
|
||||
```
|
||||
|
||||
3. Deploy a client pod for testing
|
||||
```bash
|
||||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
|
||||
```
|
||||
|
||||
4. Check that client pod is ready
|
||||
```bash
|
||||
kubectl get pods -n $APP_NAMESPACE
|
||||
```
|
||||
|
||||
5. Send request to application
|
||||
```bash
|
||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log
|
||||
```
|
||||
65
SearchQnA/kubernetes/searchQnA_gaudi.yaml
Normal file
65
SearchQnA/kubernetes/searchQnA_gaudi.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: gmc.opea.io/v1alpha3
|
||||
kind: GMConnector
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: gmconnector
|
||||
app.kubernetes.io/managed-by: kustomize
|
||||
gmc/platform: gaudi
|
||||
name: searchqa
|
||||
namespace: searchqa
|
||||
spec:
|
||||
routerConfig:
|
||||
name: router
|
||||
serviceName: router-service
|
||||
nodes:
|
||||
root:
|
||||
routerType: Sequence
|
||||
steps:
|
||||
- name: Embedding
|
||||
internalService:
|
||||
serviceName: embedding-svc
|
||||
config:
|
||||
endpoint: /v1/embeddings
|
||||
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
|
||||
- name: TeiEmbeddingGaudi
|
||||
internalService:
|
||||
serviceName: tei-embedding-gaudi-svc
|
||||
isDownstreamService: true
|
||||
- name: WebRetriever
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: web-retriever-svc
|
||||
config:
|
||||
endpoint: /v1/web_retrieval
|
||||
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
|
||||
GOOGLE_API_KEY: "insert-your-google-api-key-here"
|
||||
GOOGLE_CSE_ID: "insert-your-google-cse-id-here"
|
||||
- name: Reranking
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: reranking-svc
|
||||
config:
|
||||
endpoint: /v1/reranking
|
||||
TEI_RERANKING_ENDPOINT: tei-reranking-svc
|
||||
- name: TeiReranking
|
||||
internalService:
|
||||
serviceName: tei-reranking-svc
|
||||
config:
|
||||
endpoint: /rerank
|
||||
isDownstreamService: true
|
||||
- name: Llm
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: llm-svc
|
||||
config:
|
||||
endpoint: /v1/chat/completions
|
||||
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||
- name: TgiGaudi
|
||||
internalService:
|
||||
serviceName: tgi-gaudi-svc
|
||||
config:
|
||||
endpoint: /generate
|
||||
isDownstreamService: true
|
||||
65
SearchQnA/kubernetes/searchQnA_xeon.yaml
Normal file
65
SearchQnA/kubernetes/searchQnA_xeon.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: gmc.opea.io/v1alpha3
|
||||
kind: GMConnector
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: gmconnector
|
||||
app.kubernetes.io/managed-by: kustomize
|
||||
gmc/platform: xeon
|
||||
name: searchqa
|
||||
namespace: searchqa
|
||||
spec:
|
||||
routerConfig:
|
||||
name: router
|
||||
serviceName: router-service
|
||||
nodes:
|
||||
root:
|
||||
routerType: Sequence
|
||||
steps:
|
||||
- name: Embedding
|
||||
internalService:
|
||||
serviceName: embedding-svc
|
||||
config:
|
||||
endpoint: /v1/embeddings
|
||||
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
|
||||
- name: TeiEmbedding
|
||||
internalService:
|
||||
serviceName: tei-embedding-svc
|
||||
isDownstreamService: true
|
||||
- name: WebRetriever
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: web-retriever-svc
|
||||
config:
|
||||
endpoint: /v1/web_retrieval
|
||||
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
|
||||
GOOGLE_API_KEY: "insert-your-google-api-key-here"
|
||||
GOOGLE_CSE_ID: "insert-your-google-cse-id-here"
|
||||
- name: Reranking
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: reranking-svc
|
||||
config:
|
||||
endpoint: /v1/reranking
|
||||
TEI_RERANKING_ENDPOINT: tei-reranking-svc
|
||||
- name: TeiReranking
|
||||
internalService:
|
||||
serviceName: tei-reranking-svc
|
||||
config:
|
||||
endpoint: /rerank
|
||||
isDownstreamService: true
|
||||
- name: Llm
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: llm-svc
|
||||
config:
|
||||
endpoint: /v1/chat/completions
|
||||
TGI_LLM_ENDPOINT: tgi-service-m
|
||||
- name: Tgi
|
||||
internalService:
|
||||
serviceName: tgi-service-m
|
||||
config:
|
||||
endpoint: /generate
|
||||
isDownstreamService: true
|
||||
Reference in New Issue
Block a user