Files
GenAIExamples/VisualQnA/kubernetes/helm/gaudi-values.yaml
2025-04-08 22:39:40 +08:00

25 lines
707 B
YAML

# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
vllm:
enabled: true
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
tag: "latest"
LLM_MODEL_ID: llava-hf/llava-1.5-7b-hf
VLLM_SKIP_WARMUP: true
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPUGRAPH_DISABLE_TENSOR_CACHE: "false"
extraCmdArgs: ["--tensor-parallel-size","1","--chat-template","examples/template_llava.jinja"]
resources:
limits:
habana.ai/gaudi: 1
tgi:
enabled: false
lvm-uservice:
LVM_BACKEND: "vLLM"
# The default model is not stable on Gaudi, use the older model.
# https://github.com/HabanaAI/vllm-fork/issues/841
LLM_MODEL_ID: llava-hf/llava-1.5-7b-hf