Update all examples yaml files of GMC in GenAIExample (#436)

* Update all examples yaml files of GMC in GenAIExample. Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
2024-07-23 16:40:51 +08:00
parent 8ad7f36fe2
commit 290a74fae9
16 changed files with 313 additions and 9 deletions
--- a/ChatQnA/kubernetes/manifests/README.md
+++ b/ChatQnA/kubernetes/manifests/README.md
--- a/ChatQnA/kubernetes/chatQnA_gaudi.yaml
+++ b/ChatQnA/kubernetes/chatQnA_gaudi.yaml
@@ -23,6 +23,7 @@ spec:
          serviceName: embedding-svc
          config:
            endpoint: /v1/embeddings
            TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
      - name: TeiEmbeddingGaudi
        internalService:
          serviceName: tei-embedding-gaudi-svc
@@ -33,6 +34,8 @@ spec:
          serviceName: retriever-svc
          config:
            endpoint: /v1/retrieval
            REDIS_URL: redis-vector-db
            TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
      - name: VectorDB
        internalService:
          serviceName: redis-vector-db
@@ -43,6 +46,7 @@ spec:
          serviceName: reranking-svc
          config:
            endpoint: /v1/reranking
            TEI_RERANKING_ENDPOINT: tei-reranking-svc
      - name: TeiReranking
        internalService:
          serviceName: tei-reranking-svc
@@ -55,6 +59,7 @@ spec:
          serviceName: llm-svc
          config:
            endpoint: /v1/chat/completions
            TGI_LLM_ENDPOINT: tgi-gaudi-svc
      - name: TgiGaudi
        internalService:
          serviceName: tgi-gaudi-svc
--- a/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml
+++ b/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml
@@ -0,0 +1,124 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: gmc.opea.io/v1alpha3
 kind: GMConnector
 metadata:
  labels:
    app.kubernetes.io/name: gmconnector
    app.kubernetes.io/managed-by: kustomize
    gmc/platform: gaudi
  name: switch
  namespace: switch
 spec:
  routerConfig:
    name: router
    serviceName: router-service
  nodes:
    root:
      routerType: Sequence
      steps:
      - name: Embedding
        nodeName: node1
      - name: Reranking
        data: $response
        internalService:
          serviceName: reranking-svc
          config:
            endpoint: /v1/reranking
            TEI_RERANKING_ENDPOINT: tei-reranking-svc
      - name: TeiReranking
        internalService:
          serviceName: tei-reranking-svc
          config:
            endpoint: /rerank
          isDownstreamService: true
      - name: Llm
        data: $response
        nodeName: node2
    node1:
      routerType: Switch
      steps:
        - name: Embedding
          condition: embedding-model-id==large
          internalService:
            serviceName: embedding-svc-large
            config:
              endpoint: /v1/embeddings
              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
        - name: Embedding
          condition: embedding-model-id==small
          internalService:
            serviceName: embedding-svc-small
            config:
              endpoint: /v1/embeddings
              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
        - name: TeiEmbeddingGaudi
          internalService:
            serviceName: tei-embedding-gaudi-svc-bge15
            config:
              MODEL_ID: BAAI/bge-base-en-v1.5
            isDownstreamService: true
        - name: TeiEmbeddingGaudi
          internalService:
            serviceName: tei-embedding-gaudi-svc-bge-small
            config:
              MODEL_ID: BAAI/bge-base-en-v1.5
            isDownstreamService: true
        - name: Retriever
          condition: embedding-model-id==large
          data: $response
          internalService:
            serviceName: retriever-svc-large
            config:
              endpoint: /v1/retrieval
              REDIS_URL: redis-vector-db-large
              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
        - name: Retriever
          condition: embedding-model-id==small
          data: $response
          internalService:
            serviceName: retriever-svc-small
            config:
              endpoint: /v1/retrieval
              REDIS_URL: redis-vector-db-small
              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
        - name: VectorDB
          internalService:
            serviceName: redis-vector-db-large
            isDownstreamService: true
        - name: VectorDB
          internalService:
            serviceName: redis-vector-db-small
            isDownstreamService: true
    node2:
      routerType: Switch
      steps:
        - name: Llm
          condition: model-id==intel
          internalService:
            serviceName: llm-svc-intel
            config:
              endpoint: /v1/chat/completions
              TGI_LLM_ENDPOINT: tgi-gaudi-service-intel
        - name: Llm
          condition: model-id==llama
          internalService:
            serviceName: llm-svc-llama
            config:
              endpoint: /v1/chat/completions
              TGI_LLM_ENDPOINT: tgi-gaudi-service-llama
        - name: TgiGaudi
          internalService:
            serviceName: tgi-gaudi-service-intel
            config:
              endpoint: /generate
              MODEL_ID: Intel/neural-chat-7b-v3-3
            isDownstreamService: true
        - name: TgiGaudi
          internalService:
            serviceName: tgi-gaudi-service-llama
            config:
              endpoint: /generate
              MODEL_ID: openlm-research/open_llama_3b
            isDownstreamService: true
--- a/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml
+++ b/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml
@@ -0,0 +1,124 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: gmc.opea.io/v1alpha3
 kind: GMConnector
 metadata:
  labels:
    app.kubernetes.io/name: gmconnector
    app.kubernetes.io/managed-by: kustomize
    gmc/platform: xeon
  name: switch
  namespace: switch
 spec:
  routerConfig:
    name: router
    serviceName: router-service
  nodes:
    root:
      routerType: Sequence
      steps:
      - name: Embedding
        nodeName: node1
      - name: Reranking
        data: $response
        internalService:
          serviceName: reranking-svc
          config:
            endpoint: /v1/reranking
            TEI_RERANKING_ENDPOINT: tei-reranking-svc
      - name: TeiReranking
        internalService:
          serviceName: tei-reranking-svc
          config:
            endpoint: /rerank
          isDownstreamService: true
      - name: Llm
        data: $response
        nodeName: node2
    node1:
      routerType: Switch
      steps:
        - name: Embedding
          condition: embedding-model-id==large
          internalService:
            serviceName: embedding-svc-large
            config:
              endpoint: /v1/embeddings
              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
        - name: Embedding
          condition: embedding-model-id==small
          internalService:
            serviceName: embedding-svc-small
            config:
              endpoint: /v1/embeddings
              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
        - name: TeiEmbedding
          internalService:
            serviceName: tei-embedding-svc-bge15
            config:
              MODEL_ID: BAAI/bge-base-en-v1.5
            isDownstreamService: true
        - name: TeiEmbedding
          internalService:
            serviceName: tei-embedding-svc-bge-small
            config:
              MODEL_ID: BAAI/bge-base-en-v1.5
            isDownstreamService: true
        - name: Retriever
          condition: embedding-model-id==large
          data: $response
          internalService:
            serviceName: retriever-svc-large
            config:
              endpoint: /v1/retrieval
              REDIS_URL: redis-vector-db-large
              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
        - name: Retriever
          condition: embedding-model-id==small
          data: $response
          internalService:
            serviceName: retriever-svc-small
            config:
              endpoint: /v1/retrieval
              REDIS_URL: redis-vector-db-small
              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
        - name: VectorDB
          internalService:
            serviceName: redis-vector-db-large
            isDownstreamService: true
        - name: VectorDB
          internalService:
            serviceName: redis-vector-db-small
            isDownstreamService: true
    node2:
      routerType: Switch
      steps:
        - name: Llm
          condition: model-id==intel
          internalService:
            serviceName: llm-svc-intel
            config:
              endpoint: /v1/chat/completions
              TGI_LLM_ENDPOINT: tgi-service-intel
        - name: Llm
          condition: model-id==llama
          internalService:
            serviceName: llm-svc-llama
            config:
              endpoint: /v1/chat/completions
              TGI_LLM_ENDPOINT: tgi-service-llama
        - name: Tgi
          internalService:
            serviceName: tgi-service-intel
            config:
              endpoint: /generate
              MODEL_ID: Intel/neural-chat-7b-v3-3
            isDownstreamService: true
        - name: Tgi
          internalService:
            serviceName: tgi-service-llama
            config:
              endpoint: /generate
              MODEL_ID: bigscience/bloom-560m
            isDownstreamService: true
--- a/ChatQnA/kubernetes/chatQnA_xeon.yaml
+++ b/ChatQnA/kubernetes/chatQnA_xeon.yaml
@@ -23,6 +23,7 @@ spec:
          serviceName: embedding-svc
          config:
            endpoint: /v1/embeddings
            TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
      - name: TeiEmbedding
        internalService:
          serviceName: tei-embedding-svc
@@ -33,6 +34,8 @@ spec:
          serviceName: retriever-svc
          config:
            endpoint: /v1/retrieval
            REDIS_URL: redis-vector-db
            TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
      - name: VectorDB
        internalService:
          serviceName: redis-vector-db
@@ -43,6 +46,7 @@ spec:
          serviceName: reranking-svc
          config:
            endpoint: /v1/reranking
            TEI_RERANKING_ENDPOINT: tei-reranking-svc
      - name: TeiReranking
        internalService:
          serviceName: tei-reranking-svc
@@ -55,6 +59,7 @@ spec:
          serviceName: llm-svc
          config:
            endpoint: /v1/chat/completions
            TGI_LLM_ENDPOINT: tgi-service-m
      - name: Tgi
        internalService:
          serviceName: tgi-service-m
--- a/ChatQnA/tests/test_gmc_on_gaudi.sh
+++ b/ChatQnA/tests/test_gmc_on_gaudi.sh
@@ -64,7 +64,7 @@ function validate_chatqna() {
   echo "Checking response results, make sure the output is reasonable. "
   local status=false
   if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
-   [[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
+   [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
       status=true
   fi
   if [ $status == false ]; then
--- a/ChatQnA/tests/test_gmc_on_xeon.sh
+++ b/ChatQnA/tests/test_gmc_on_xeon.sh
@@ -65,7 +65,7 @@ function validate_chatqna() {
   echo "Checking response results, make sure the output is reasonable. "
   local status=false
   if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
-   [[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
+   [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
       status=true
   fi
   if [ $status == false ]; then
--- a/CodeGen/kubernetes/README.md
+++ b/CodeGen/kubernetes/README.md
@@ -0,0 +1,40 @@
 <h1 align="center" id="title">Deploy CodeGen in a Kubernetes Cluster</h1>
 This document outlines the deployment process for a Code Generation (CodeGen) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
 Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
 If you have only Intel Xeon machines you could use the codegen_xeon.yaml file or if you have a Gaudi cluster you could use codegen_gaudi.yaml
 In the below example we illustrate on Xeon.
 ## Deploy the RAG application
 1. Create the desired namespace if it does not already exist and deploy the application
 ```bash
 export APP_NAMESPACE=CT
 kubectl create ns $APP_NAMESPACE
 sed -i "s|namespace: codegen|namespace: $APP_NAMESPACE|g"  ./codegen_xeon.yaml
 kubectl apply -f ./codegen_xeon.yaml
 ```
 2. Check if the application is up and ready
 ```bash
 kubectl get pods -n $APP_NAMESPACE
 ```
 3. Deploy a client pod for testing
 ```bash
 kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
 ```
 4. Check that client pod is ready
 ```bash
    kubectl get pods -n $APP_NAMESPACE
 ```
 5. Send request to application
 ```bash
 export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
 export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
 kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
 ```
--- a/CodeGen/kubernetes/codegen_gaudi.yaml
+++ b/CodeGen/kubernetes/codegen_gaudi.yaml
@@ -24,10 +24,11 @@ spec:
          serviceName: llm-service
          config:
            endpoint: /v1/chat/completions
            TGI_LLM_ENDPOINT: tgi-gaudi-svc
      - name: TgiGaudi
        internalService:
          serviceName: tgi-gaudi-svc
          config:
-            LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
+            MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
            endpoint: /generate
          isDownstreamService: true
--- a/CodeGen/kubernetes/codegen_xeon.yaml
+++ b/CodeGen/kubernetes/codegen_xeon.yaml
@@ -24,10 +24,11 @@ spec:
          serviceName: llm-service
          config:
            endpoint: /v1/chat/completions
            TGI_LLM_ENDPOINT: tgi-service
      - name: Tgi
        internalService:
          serviceName: tgi-service
          config:
-            LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
+            MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
            endpoint: /generate
          isDownstreamService: true
--- a/CodeGen/tests/test_gmc_on_gaudi.sh
+++ b/CodeGen/tests/test_gmc_on_gaudi.sh
@@ -44,7 +44,7 @@ function validate_codegen() {
    echo "Checking response results, make sure the output is reasonable. "
    local status=false
    if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
-    [[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
+    [[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
        status=true
    fi
    if [ $status == false ]; then
--- a/CodeGen/tests/test_gmc_on_xeon.sh
+++ b/CodeGen/tests/test_gmc_on_xeon.sh
@@ -44,7 +44,7 @@ function validate_codegen() {
    echo "Checking response results, make sure the output is reasonable. "
    local status=false
    if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
-    [[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
+    [[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
        status=true
    fi
    if [ $status == false ]; then
--- a/CodeTrans/kubernetes/codetrans_gaudi.yaml
+++ b/CodeTrans/kubernetes/codetrans_gaudi.yaml
@@ -24,10 +24,11 @@ spec:
          serviceName: codetrans-service
          config:
            endpoint: /v1/chat/completions
            TGI_LLM_ENDPOINT: tgi-gaudi-svc
      - name: TgiGaudi
        internalService:
          serviceName: tgi-gaudi-svc
          config:
-            LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok
+            MODEL_ID: HuggingFaceH4/mistral-7b-grok
            endpoint: /generate
          isDownstreamService: true
--- a/CodeTrans/kubernetes/codetrans_xeon.yaml
+++ b/CodeTrans/kubernetes/codetrans_xeon.yaml
@@ -24,10 +24,11 @@ spec:
          serviceName: codetrans-service
          config:
            endpoint: /v1/chat/completions
            TGI_LLM_ENDPOINT: tgi-service
      - name: Tgi
        internalService:
          serviceName: tgi-service
          config:
-            LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok
+            MODEL_ID: HuggingFaceH4/mistral-7b-grok
            endpoint: /generate
          isDownstreamService: true
--- a/DocSum/kubernetes/docsum_gaudi.yaml
+++ b/DocSum/kubernetes/docsum_gaudi.yaml
@@ -18,13 +18,14 @@ spec:
    root:
      routerType: Sequence
      steps:
-      - name: DocSumGaudi
+      - name: DocSum
        data: $response
        internalService:
          serviceName: docsum-llm-uservice
          config:
            endpoint: /v1/chat/docsum
            PORT: "9009"
            TGI_LLM_ENDPOINT: tgi-gaudi-svc
      - name: TgiGaudi
        internalService:
          serviceName: tgi-gaudi-svc
--- a/DocSum/kubernetes/docsum_xeon.yaml
+++ b/DocSum/kubernetes/docsum_xeon.yaml
@@ -25,6 +25,7 @@ spec:
          config:
            endpoint: /v1/chat/docsum
            PORT: "9009"
            TGI_LLM_ENDPOINT: tgi-svc
      - name: Tgi
        internalService:
          serviceName: tgi-svc