Sync value files from GenAIInfra (#1428)

All gaudi values updated with extra flags. Added helm support for 2 new examples Text2Image and SearchQnA. Minor fix for llm-uservice. Signed-off-by: Dolpher Du <dolpher.du@intel.com>
2025-01-22 17:44:11 +08:00
parent 5c36443b11
commit ee0e5cc8d9
34 changed files with 343 additions and 1487 deletions
--- a/SearchQnA/kubernetes/helm/README.md
+++ b/SearchQnA/kubernetes/helm/README.md
@@ -0,0 +1,18 @@
+# Deploy SearchQnA on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Xeon
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install searchqna oci://ghcr.io/opea-project/charts/searchqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+```
+
+## Deploy on Gaudi
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install searchqna oci://ghcr.io/opea-project/charts/searchqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
+```
--- a/SearchQnA/kubernetes/helm/cpu-values.yaml
+++ b/SearchQnA/kubernetes/helm/cpu-values.yaml
@@ -0,0 +1,7 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+llm_uservice:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
--- a/SearchQnA/kubernetes/helm/gaudi-values.yaml
+++ b/SearchQnA/kubernetes/helm/gaudi-values.yaml
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.3.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "2048"
+  MAX_TOTAL_TOKENS: "4096"
+  CUDA_GRAPHS: ""
+  HF_HUB_DISABLE_PROGRESS_BARS: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 0
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
+  livenessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
+
+tei:
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tei-gaudi
+    tag: "1.5.0"
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  MAX_WARMUP_SEQUENCE_LENGTH: 512
+  securityContext:
+    readOnlyRootFilesystem: false
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  livenessProbe:
+    timeoutSeconds: 1
+  readinessProbe:
+    timeoutSeconds: 1