Add helm deployment instructions for GenAIExamples (#1373)

Add helm deployment instructions for ChatQnA, AgentQnA, AudioQnA, CodeTrans, DocSum, FaqGen and VisualQnA Signed-off-by: Dolpher Du <dolpher.du@intel.com>
2025-01-10 09:55:31 +08:00
parent 99120f4cd2
commit c795ef2203
104 changed files with 828 additions and 14982 deletions
--- a/AudioQnA/README.md
+++ b/AudioQnA/README.md
@@ -71,6 +71,10 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instr

 Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AudioQnA on Xeon.

+## Deploy using Helm Chart
+
+Refer to the [AudioQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AudioQnA on Kubernetes.
+
 ## Supported Models

 ### ASR
--- a/AudioQnA/kubernetes/intel/README_gmc.md
+++ b/AudioQnA/kubernetes/intel/README_gmc.md
--- a/AudioQnA/kubernetes/intel/hpu/gaudi/gmc/audioQnA_gaudi.yaml
+++ b/AudioQnA/kubernetes/intel/hpu/gaudi/gmc/audioQnA_gaudi.yaml
--- a/AudioQnA/kubernetes/intel/cpu/xeon/gmc/audioQnA_xeon.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/gmc/audioQnA_xeon.yaml
--- a/AudioQnA/kubernetes/helm/README.md
+++ b/AudioQnA/kubernetes/helm/README.md
@@ -0,0 +1,18 @@
+# Deploy AudioQnA on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Xeon
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install audioqna oci://ghcr.io/opea-project/charts/audioqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+```
+
+## Deploy on Gaudi
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install audioqna oci://ghcr.io/opea-project/charts/audioqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
+```
--- a/AudioQnA/kubernetes/helm/cpu-values.yaml
+++ b/AudioQnA/kubernetes/helm/cpu-values.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
--- a/AudioQnA/kubernetes/helm/gaudi-values.yaml
+++ b/AudioQnA/kubernetes/helm/gaudi-values.yaml
@@ -0,0 +1,43 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.6"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
+  HF_HUB_DISABLE_PROGRESS_BARS: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 0
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
+  livenessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
+
+whisper:
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+
+speecht5:
+  resources:
+    limits:
+      habana.ai/gaudi: 1
--- a/AudioQnA/kubernetes/intel/README.md
+++ b/AudioQnA/kubernetes/intel/README.md
@@ -1,32 +0,0 @@
-# Deploy AudioQnA in a Kubernetes Cluster
-
-> [NOTE]
-> The following values must be set before you can deploy:
-> HUGGINGFACEHUB_API_TOKEN
-> You can also customize the "MODEL_ID" and "model-volume"
-
-## Deploy On Xeon
-```
-cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
-export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
-kubectl apply -f audioqna.yaml
-```
-## Deploy On Gaudi
-```
-cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
-export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
-kubectl apply -f audioqna.yaml
-```
-
-
-## Verify Services
-
-Make sure all the pods are running, and restart the audioqna-xxxx pod if necessary.
-
-```bash
-kubectl get pods
-
-curl http://${host_ip}:3008/v1/audioqna   -X POST   -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}'   -H 'Content-Type: application/json'
-```
--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -1,241 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: audio-qna-config
-  namespace: default
-data:
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
-  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
-
-  WHISPER_SERVER_HOST_IP: whisper-svc
-  WHISPER_SERVER_PORT: 7066
-  SPEECHT5_SERVER_HOST_IP: speecht5-svc
-  SPEECHT5_SERVER_PORT: 7055
-  LLM_SERVER_HOST_IP: llm-svc
-  LLM_SERVER_PORT: 3006
-
---
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: whisper-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: whisper-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: whisper-deploy
-    spec:
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: whisper-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: opea/whisper:latest
-        imagePullPolicy: IfNotPresent
-        name: whisper-deploy
-        args: null
-        ports:
-        - containerPort: 7066
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: whisper-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: whisper-deploy
-  ports:
-  - name: service
-    port: 7066
-    targetPort: 7066
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: speecht5-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: speecht5-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: speecht5-deploy
-    spec:
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: speecht5-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: opea/speecht5:latest
-        imagePullPolicy: IfNotPresent
-        name: speecht5-deploy
-        args: null
-        ports:
-        - containerPort: 7055
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: speecht5-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: speecht5-deploy
-  ports:
-  - name: service
-    port: 7055
-    targetPort: 7055
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /home/sdp/cesg
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 3006
-    targetPort: 80
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: audioqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: audioqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: audioqna-backend-server-deploy
-    spec:
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: audioqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: opea/audioqna:latest
-        imagePullPolicy: IfNotPresent
-        name: audioqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: audioqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: audioqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 3008
-    targetPort: 8888
-    nodePort: 30666
--- a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
@@ -1,293 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: audio-qna-config
-  namespace: default
-data:
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
-  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
-
-  WHISPER_SERVER_HOST_IP: whisper-svc
-  WHISPER_SERVER_PORT: 7066
-  SPEECHT5_SERVER_HOST_IP: speecht5-svc
-  SPEECHT5_SERVER_PORT: 7055
-  LLM_SERVER_HOST_IP: llm-svc
-  LLM_SERVER_PORT: 3006
-
---
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: whisper-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: whisper-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: whisper-deploy
-    spec:
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: whisper-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: opea/whisper-gaudi:latest
-        imagePullPolicy: IfNotPresent
-        name: whisper-deploy
-        args: null
-        ports:
-        - containerPort: 7066
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: whisper-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: whisper-deploy
-  ports:
-  - name: service
-    port: 7066
-    targetPort: 7066
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: speecht5-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: speecht5-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: speecht5-deploy
-    spec:
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: speecht5-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: opea/speecht5-gaudi:latest
-        imagePullPolicy: IfNotPresent
-        name: speecht5-deploy
-        args: null
-        ports:
-        - containerPort: 7055
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: speecht5-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: speecht5-deploy
-  ports:
-  - name: service
-    port: 7055
-    targetPort: 7055
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: ENABLE_HPU_GRAPH
-          value: 'true'
-        - name: LIMIT_HPU_GRAPH
-          value: 'true'
-        - name: USE_FLASH_ATTENTION
-          value: 'true'
-        - name: FLASH_ATTENTION_RECOMPUTE
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: PREFILL_BATCH_BUCKET_SIZE
-          value: "1"
-        - name: BATCH_BUCKET_SIZE
-          value: "8"
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 3006
-    targetPort: 80
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: audioqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: audioqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: audioqna-backend-server-deploy
-    spec:
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: audioqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: audio-qna-config
-        image: opea/audioqna:latest
-        imagePullPolicy: IfNotPresent
-        name: audioqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: audioqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: audioqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 3008
-    targetPort: 8888
-    nodePort: 30666
--- a/AudioQnA/tests/test_gmc_on_gaudi.sh
+++ b/AudioQnA/tests/test_gmc_on_gaudi.sh
@@ -96,12 +96,12 @@ fi

 case "$1" in
    install_AudioQnA)
-        pushd AudioQnA/kubernetes/intel/hpu/gaudi/gmc
+        pushd AudioQnA/kubernetes/gmc
        install_audioqa
        popd
        ;;
    validate_AudioQnA)
-        pushd AudioQnA/kubernetes/intel/hpu/gaudi/gmc
+        pushd AudioQnA/kubernetes/gmc
        validate_audioqa
        popd
        ;;
--- a/AudioQnA/tests/test_gmc_on_xeon.sh
+++ b/AudioQnA/tests/test_gmc_on_xeon.sh
@@ -96,12 +96,12 @@ fi

 case "$1" in
    install_AudioQnA)
-        pushd AudioQnA/kubernetes/intel/cpu/xeon/gmc
+        pushd AudioQnA/kubernetes/gmc
        install_audioqa
        popd
        ;;
    validate_AudioQnA)
-        pushd AudioQnA/kubernetes/intel/cpu/xeon/gmc
+        pushd AudioQnA/kubernetes/gmc
        validate_audioqa
        popd
        ;;