87 lines
1.9 KiB
YAML
87 lines
1.9 KiB
YAML
# Copyright (C) 2025 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# Accelerate inferencing in heaviest components to improve performance
|
|
# by overriding their subchart values
|
|
|
|
image:
|
|
repository: opea/chatqna
|
|
CHATQNA_TYPE: "CHATQNA_GUARDRAILS"
|
|
# guardrails related config
|
|
guardrails-usvc:
|
|
enabled: true
|
|
SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-vllm-guardrails"
|
|
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
|
retryTimeoutSeconds: 720
|
|
|
|
# gaudi related config
|
|
# tei running on CPU by default
|
|
# tei:
|
|
# accelDevice: "gaudi"
|
|
# image:
|
|
# repository: ghcr.io/huggingface/tei-gaudi
|
|
# tag: 1.5.0
|
|
# resources:
|
|
# limits:
|
|
# habana.ai/gaudi: 1
|
|
# securityContext:
|
|
# readOnlyRootFilesystem: false
|
|
|
|
teirerank:
|
|
accelDevice: "gaudi"
|
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
|
MAX_WARMUP_SEQUENCE_LENGTH: "512"
|
|
image:
|
|
repository: ghcr.io/huggingface/tei-gaudi
|
|
tag: "1.5.0"
|
|
resources:
|
|
limits:
|
|
habana.ai/gaudi: 1
|
|
securityContext:
|
|
readOnlyRootFilesystem: false
|
|
readinessProbe:
|
|
timeoutSeconds: 1
|
|
|
|
vllm-guardrails:
|
|
enabled: true
|
|
accelDevice: "gaudi"
|
|
image:
|
|
repository: opea/vllm-gaudi
|
|
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
|
resources:
|
|
limits:
|
|
habana.ai/gaudi: 1
|
|
extraCmdArgs: [
|
|
"--tensor-parallel-size", "1",
|
|
"--block-size", "128",
|
|
"--max-num-seqs", "256",
|
|
"--max-seq-len-to-capture", "2048"
|
|
]
|
|
startupProbe:
|
|
failureThreshold: 360
|
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
|
|
|
tgi:
|
|
enabled: false
|
|
vllm:
|
|
enabled: true
|
|
shmSize: 1Gi
|
|
accelDevice: "gaudi"
|
|
image:
|
|
repository: opea/vllm-gaudi
|
|
resources:
|
|
limits:
|
|
habana.ai/gaudi: 1
|
|
startupProbe:
|
|
failureThreshold: 360
|
|
|
|
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
|
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
|
|
|
extraCmdArgs: [
|
|
"--tensor-parallel-size", "1",
|
|
"--block-size", "128",
|
|
"--max-num-seqs", "256",
|
|
"--max-seq-len-to-capture", "2048"
|
|
]
|