Update all examples yaml files of GMC in GenAIExample (#436)

* Update all examples yaml files of GMC in GenAIExample.
Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
This commit is contained in:
Steve Zhang
2024-07-23 16:40:51 +08:00
committed by GitHub
parent 8ad7f36fe2
commit 290a74fae9
16 changed files with 313 additions and 9 deletions

View File

@@ -23,6 +23,7 @@ spec:
serviceName: embedding-svc serviceName: embedding-svc
config: config:
endpoint: /v1/embeddings endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
- name: TeiEmbeddingGaudi - name: TeiEmbeddingGaudi
internalService: internalService:
serviceName: tei-embedding-gaudi-svc serviceName: tei-embedding-gaudi-svc
@@ -33,6 +34,8 @@ spec:
serviceName: retriever-svc serviceName: retriever-svc
config: config:
endpoint: /v1/retrieval endpoint: /v1/retrieval
REDIS_URL: redis-vector-db
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
- name: VectorDB - name: VectorDB
internalService: internalService:
serviceName: redis-vector-db serviceName: redis-vector-db
@@ -43,6 +46,7 @@ spec:
serviceName: reranking-svc serviceName: reranking-svc
config: config:
endpoint: /v1/reranking endpoint: /v1/reranking
TEI_RERANKING_ENDPOINT: tei-reranking-svc
- name: TeiReranking - name: TeiReranking
internalService: internalService:
serviceName: tei-reranking-svc serviceName: tei-reranking-svc
@@ -55,6 +59,7 @@ spec:
serviceName: llm-svc serviceName: llm-svc
config: config:
endpoint: /v1/chat/completions endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi - name: TgiGaudi
internalService: internalService:
serviceName: tgi-gaudi-svc serviceName: tgi-gaudi-svc

View File

@@ -0,0 +1,124 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: gaudi
name: switch
namespace: switch
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: Embedding
nodeName: node1
- name: Reranking
data: $response
internalService:
serviceName: reranking-svc
config:
endpoint: /v1/reranking
TEI_RERANKING_ENDPOINT: tei-reranking-svc
- name: TeiReranking
internalService:
serviceName: tei-reranking-svc
config:
endpoint: /rerank
isDownstreamService: true
- name: Llm
data: $response
nodeName: node2
node1:
routerType: Switch
steps:
- name: Embedding
condition: embedding-model-id==large
internalService:
serviceName: embedding-svc-large
config:
endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
- name: Embedding
condition: embedding-model-id==small
internalService:
serviceName: embedding-svc-small
config:
endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
- name: TeiEmbeddingGaudi
internalService:
serviceName: tei-embedding-gaudi-svc-bge15
config:
MODEL_ID: BAAI/bge-base-en-v1.5
isDownstreamService: true
- name: TeiEmbeddingGaudi
internalService:
serviceName: tei-embedding-gaudi-svc-bge-small
config:
MODEL_ID: BAAI/bge-base-en-v1.5
isDownstreamService: true
- name: Retriever
condition: embedding-model-id==large
data: $response
internalService:
serviceName: retriever-svc-large
config:
endpoint: /v1/retrieval
REDIS_URL: redis-vector-db-large
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
- name: Retriever
condition: embedding-model-id==small
data: $response
internalService:
serviceName: retriever-svc-small
config:
endpoint: /v1/retrieval
REDIS_URL: redis-vector-db-small
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
- name: VectorDB
internalService:
serviceName: redis-vector-db-large
isDownstreamService: true
- name: VectorDB
internalService:
serviceName: redis-vector-db-small
isDownstreamService: true
node2:
routerType: Switch
steps:
- name: Llm
condition: model-id==intel
internalService:
serviceName: llm-svc-intel
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-service-intel
- name: Llm
condition: model-id==llama
internalService:
serviceName: llm-svc-llama
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-service-llama
- name: TgiGaudi
internalService:
serviceName: tgi-gaudi-service-intel
config:
endpoint: /generate
MODEL_ID: Intel/neural-chat-7b-v3-3
isDownstreamService: true
- name: TgiGaudi
internalService:
serviceName: tgi-gaudi-service-llama
config:
endpoint: /generate
MODEL_ID: openlm-research/open_llama_3b
isDownstreamService: true

View File

@@ -0,0 +1,124 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: xeon
name: switch
namespace: switch
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: Embedding
nodeName: node1
- name: Reranking
data: $response
internalService:
serviceName: reranking-svc
config:
endpoint: /v1/reranking
TEI_RERANKING_ENDPOINT: tei-reranking-svc
- name: TeiReranking
internalService:
serviceName: tei-reranking-svc
config:
endpoint: /rerank
isDownstreamService: true
- name: Llm
data: $response
nodeName: node2
node1:
routerType: Switch
steps:
- name: Embedding
condition: embedding-model-id==large
internalService:
serviceName: embedding-svc-large
config:
endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
- name: Embedding
condition: embedding-model-id==small
internalService:
serviceName: embedding-svc-small
config:
endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
- name: TeiEmbedding
internalService:
serviceName: tei-embedding-svc-bge15
config:
MODEL_ID: BAAI/bge-base-en-v1.5
isDownstreamService: true
- name: TeiEmbedding
internalService:
serviceName: tei-embedding-svc-bge-small
config:
MODEL_ID: BAAI/bge-base-en-v1.5
isDownstreamService: true
- name: Retriever
condition: embedding-model-id==large
data: $response
internalService:
serviceName: retriever-svc-large
config:
endpoint: /v1/retrieval
REDIS_URL: redis-vector-db-large
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
- name: Retriever
condition: embedding-model-id==small
data: $response
internalService:
serviceName: retriever-svc-small
config:
endpoint: /v1/retrieval
REDIS_URL: redis-vector-db-small
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
- name: VectorDB
internalService:
serviceName: redis-vector-db-large
isDownstreamService: true
- name: VectorDB
internalService:
serviceName: redis-vector-db-small
isDownstreamService: true
node2:
routerType: Switch
steps:
- name: Llm
condition: model-id==intel
internalService:
serviceName: llm-svc-intel
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-service-intel
- name: Llm
condition: model-id==llama
internalService:
serviceName: llm-svc-llama
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-service-llama
- name: Tgi
internalService:
serviceName: tgi-service-intel
config:
endpoint: /generate
MODEL_ID: Intel/neural-chat-7b-v3-3
isDownstreamService: true
- name: Tgi
internalService:
serviceName: tgi-service-llama
config:
endpoint: /generate
MODEL_ID: bigscience/bloom-560m
isDownstreamService: true

View File

@@ -23,6 +23,7 @@ spec:
serviceName: embedding-svc serviceName: embedding-svc
config: config:
endpoint: /v1/embeddings endpoint: /v1/embeddings
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
- name: TeiEmbedding - name: TeiEmbedding
internalService: internalService:
serviceName: tei-embedding-svc serviceName: tei-embedding-svc
@@ -33,6 +34,8 @@ spec:
serviceName: retriever-svc serviceName: retriever-svc
config: config:
endpoint: /v1/retrieval endpoint: /v1/retrieval
REDIS_URL: redis-vector-db
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
- name: VectorDB - name: VectorDB
internalService: internalService:
serviceName: redis-vector-db serviceName: redis-vector-db
@@ -43,6 +46,7 @@ spec:
serviceName: reranking-svc serviceName: reranking-svc
config: config:
endpoint: /v1/reranking endpoint: /v1/reranking
TEI_RERANKING_ENDPOINT: tei-reranking-svc
- name: TeiReranking - name: TeiReranking
internalService: internalService:
serviceName: tei-reranking-svc serviceName: tei-reranking-svc
@@ -55,6 +59,7 @@ spec:
serviceName: llm-svc serviceName: llm-svc
config: config:
endpoint: /v1/chat/completions endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-service-m
- name: Tgi - name: Tgi
internalService: internalService:
serviceName: tgi-service-m serviceName: tgi-service-m

View File

@@ -64,7 +64,7 @@ function validate_chatqna() {
echo "Checking response results, make sure the output is reasonable. " echo "Checking response results, make sure the output is reasonable. "
local status=false local status=false
if [[ -f $LOG_PATH/curl_chatqna.log ]] && \ if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
[[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
status=true status=true
fi fi
if [ $status == false ]; then if [ $status == false ]; then

View File

@@ -65,7 +65,7 @@ function validate_chatqna() {
echo "Checking response results, make sure the output is reasonable. " echo "Checking response results, make sure the output is reasonable. "
local status=false local status=false
if [[ -f $LOG_PATH/curl_chatqna.log ]] && \ if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
[[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
status=true status=true
fi fi
if [ $status == false ]; then if [ $status == false ]; then

View File

@@ -0,0 +1,40 @@
<h1 align="center" id="title">Deploy CodeGen in a Kubernetes Cluster</h1>
This document outlines the deployment process for a Code Generation (CodeGen) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the codegen_xeon.yaml file or if you have a Gaudi cluster you could use codegen_gaudi.yaml
In the below example we illustrate on Xeon.
## Deploy the RAG application
1. Create the desired namespace if it does not already exist and deploy the application
```bash
export APP_NAMESPACE=CT
kubectl create ns $APP_NAMESPACE
sed -i "s|namespace: codegen|namespace: $APP_NAMESPACE|g" ./codegen_xeon.yaml
kubectl apply -f ./codegen_xeon.yaml
```
2. Check if the application is up and ready
```bash
kubectl get pods -n $APP_NAMESPACE
```
3. Deploy a client pod for testing
```bash
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
```
4. Check that client pod is ready
```bash
kubectl get pods -n $APP_NAMESPACE
```
5. Send request to application
```bash
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
```

View File

@@ -24,10 +24,11 @@ spec:
serviceName: llm-service serviceName: llm-service
config: config:
endpoint: /v1/chat/completions endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi - name: TgiGaudi
internalService: internalService:
serviceName: tgi-gaudi-svc serviceName: tgi-gaudi-svc
config: config:
LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
endpoint: /generate endpoint: /generate
isDownstreamService: true isDownstreamService: true

View File

@@ -24,10 +24,11 @@ spec:
serviceName: llm-service serviceName: llm-service
config: config:
endpoint: /v1/chat/completions endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-service
- name: Tgi - name: Tgi
internalService: internalService:
serviceName: tgi-service serviceName: tgi-service
config: config:
LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
endpoint: /generate endpoint: /generate
isDownstreamService: true isDownstreamService: true

View File

@@ -44,7 +44,7 @@ function validate_codegen() {
echo "Checking response results, make sure the output is reasonable. " echo "Checking response results, make sure the output is reasonable. "
local status=false local status=false
if [[ -f $LOG_PATH/gmc_codegen.log ]] && \ if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
[[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then [[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
status=true status=true
fi fi
if [ $status == false ]; then if [ $status == false ]; then

View File

@@ -44,7 +44,7 @@ function validate_codegen() {
echo "Checking response results, make sure the output is reasonable. " echo "Checking response results, make sure the output is reasonable. "
local status=false local status=false
if [[ -f $LOG_PATH/gmc_codegen.log ]] && \ if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
[[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then [[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
status=true status=true
fi fi
if [ $status == false ]; then if [ $status == false ]; then

View File

@@ -24,10 +24,11 @@ spec:
serviceName: codetrans-service serviceName: codetrans-service
config: config:
endpoint: /v1/chat/completions endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi - name: TgiGaudi
internalService: internalService:
serviceName: tgi-gaudi-svc serviceName: tgi-gaudi-svc
config: config:
LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok MODEL_ID: HuggingFaceH4/mistral-7b-grok
endpoint: /generate endpoint: /generate
isDownstreamService: true isDownstreamService: true

View File

@@ -24,10 +24,11 @@ spec:
serviceName: codetrans-service serviceName: codetrans-service
config: config:
endpoint: /v1/chat/completions endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-service
- name: Tgi - name: Tgi
internalService: internalService:
serviceName: tgi-service serviceName: tgi-service
config: config:
LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok MODEL_ID: HuggingFaceH4/mistral-7b-grok
endpoint: /generate endpoint: /generate
isDownstreamService: true isDownstreamService: true

View File

@@ -18,13 +18,14 @@ spec:
root: root:
routerType: Sequence routerType: Sequence
steps: steps:
- name: DocSumGaudi - name: DocSum
data: $response data: $response
internalService: internalService:
serviceName: docsum-llm-uservice serviceName: docsum-llm-uservice
config: config:
endpoint: /v1/chat/docsum endpoint: /v1/chat/docsum
PORT: "9009" PORT: "9009"
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi - name: TgiGaudi
internalService: internalService:
serviceName: tgi-gaudi-svc serviceName: tgi-gaudi-svc

View File

@@ -25,6 +25,7 @@ spec:
config: config:
endpoint: /v1/chat/docsum endpoint: /v1/chat/docsum
PORT: "9009" PORT: "9009"
TGI_LLM_ENDPOINT: tgi-svc
- name: Tgi - name: Tgi
internalService: internalService:
serviceName: tgi-svc serviceName: tgi-svc