Files
GenAIExamples/SearchQnA/kubernetes/helm/gaudi-values.yaml
2025-04-08 22:39:40 +08:00

45 lines
1003 B
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
tgi:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.3.1"
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "2048"
MAX_TOTAL_TOKENS: "4096"
CUDA_GRAPHS: ""
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
tei:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: "1.5.0"
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
securityContext:
readOnlyRootFilesystem: false
resources:
limits:
habana.ai/gaudi: 1
readinessProbe:
timeoutSeconds: 1