Sync value files from GenAIInfra (#1428)
All gaudi values updated with extra flags. Added helm support for 2 new examples Text2Image and SearchQnA. Minor fix for llm-uservice. Signed-off-by: Dolpher Du <dolpher.du@intel.com>
This commit is contained in:
18
SearchQnA/kubernetes/helm/README.md
Normal file
18
SearchQnA/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Deploy SearchQnA on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install searchqna oci://ghcr.io/opea-project/charts/searchqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install searchqna oci://ghcr.io/opea-project/charts/searchqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
7
SearchQnA/kubernetes/helm/cpu-values.yaml
Normal file
7
SearchQnA/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
llm_uservice:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
50
SearchQnA/kubernetes/helm/gaudi-values.yaml
Normal file
50
SearchQnA/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.3.1"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "2048"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
CUDA_GRAPHS: ""
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
|
||||
tei:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tei-gaudi
|
||||
tag: "1.5.0"
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
livenessProbe:
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
timeoutSeconds: 1
|
||||
Reference in New Issue
Block a user