Files
GenAIExamples/AgentQnA/kubernetes/helm/gaudi-values.yaml
2025-04-15 14:27:12 +08:00

36 lines
1.0 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values
tgi:
enabled: false
vllm:
enabled: true
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
resources:
limits:
habana.ai/gaudi: 4
LLM_MODEL_ID: "meta-llama/Llama-3.3-70B-Instruct"
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
VLLM_SKIP_WARMUP: true
shmSize: 16Gi
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
llm_engine: vllm
model: "meta-llama/Llama-3.3-70B-Instruct"
ragagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
llm_engine: vllm
model: "meta-llama/Llama-3.3-70B-Instruct"
sqlagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
llm_engine: vllm
model: "meta-llama/Llama-3.3-70B-Instruct"