Files
GenAIExamples/CodeGen/kubernetes/helm/gaudi-values.yaml
2025-01-08 13:20:32 +08:00

29 lines
634 B
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
tgi:
accelDevice: "gaudi"
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120