# Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Accelerate inferencing in heaviest components to improve performance # by overriding their subchart values tgi: enabled: false vllm: enabled: true accelDevice: "gaudi" image: repository: opea/vllm-gaudi startupProbe: failureThreshold: 360 PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" OMPI_MCA_btl_vader_single_copy_mechanism: "none" resources: limits: habana.ai/gaudi: 1 extraCmdArgs: [ "--tensor-parallel-size", "1", "--block-size", "128", "--max-num-seqs", "256", "--max-seq-len-to-capture", "2048" ] llm-uservice: TEXTGEN_BACKEND: vLLM retryTimeoutSeconds: 720