29 lines
634 B
YAML
29 lines
634 B
YAML
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
tgi:
|
|
accelDevice: "gaudi"
|
|
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
|
|
image:
|
|
repository: ghcr.io/huggingface/tgi-gaudi
|
|
tag: "2.0.6"
|
|
resources:
|
|
limits:
|
|
habana.ai/gaudi: 1
|
|
MAX_INPUT_LENGTH: "1024"
|
|
MAX_TOTAL_TOKENS: "2048"
|
|
CUDA_GRAPHS: ""
|
|
livenessProbe:
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
timeoutSeconds: 1
|
|
readinessProbe:
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
timeoutSeconds: 1
|
|
startupProbe:
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
timeoutSeconds: 1
|
|
failureThreshold: 120
|