# Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi tag: "2.3.1" resources: limits: habana.ai/gaudi: 1 MAX_INPUT_LENGTH: "2048" MAX_TOTAL_TOKENS: "4096" CUDA_GRAPHS: "" HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 ENABLE_HPU_GRAPH: true LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true livenessProbe: initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 1 readinessProbe: initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 1 startupProbe: initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 1 failureThreshold: 120 tei: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tei-gaudi tag: "1.5.0" OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 securityContext: readOnlyRootFilesystem: false resources: limits: habana.ai/gaudi: 1 livenessProbe: timeoutSeconds: 1 readinessProbe: timeoutSeconds: 1