# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

tgi:
  enabled: false

vllm:
  enabled: true
  accelDevice: "gaudi"
  image:
    repository: opea/vllm-gaudi
  startupProbe:
    failureThreshold: 360

  PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
  OMPI_MCA_btl_vader_single_copy_mechanism: "none"

  resources:
    limits:
      habana.ai/gaudi: 1
  extraCmdArgs: [
    "--tensor-parallel-size", "1",
    "--block-size", "128",
    "--max-num-seqs", "256",
    "--max-seq-len-to-capture", "2048"
  ]

llm-uservice:
  TEXTGEN_BACKEND: vLLM
  retryTimeoutSeconds: 720