Files
GenAIExamples/ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml
dolpher ee0e5cc8d9 Sync value files from GenAIInfra (#1428)
All gaudi values updated with extra flags.
Added helm support for 2 new examples Text2Image and SearchQnA. Minor fix for llm-uservice.

Signed-off-by: Dolpher Du <dolpher.du@intel.com>
2025-01-22 17:44:11 +08:00

113 lines
2.5 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
image:
repository: opea/chatqna-guardrails
# guardrails related config
guardrails-usvc:
enabled: true
# SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
# gaudi related config
# tei running on CPU by default
# tei:
# accelDevice: "gaudi"
# image:
# repository: ghcr.io/huggingface/tei-gaudi
# tag: 1.5.0
# resources:
# limits:
# habana.ai/gaudi: 1
# securityContext:
# readOnlyRootFilesystem: false
# livenessProbe:
# timeoutSeconds: 1
# readinessProbe:
# timeoutSeconds: 1
teirerank:
accelDevice: "gaudi"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: "1.5.0"
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
tgi-guardrails:
enabled: true
accelDevice: "gaudi"
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.3.1"
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
tgi:
enabled: false
vllm:
enabled: true
shmSize: 1Gi
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
resources:
limits:
habana.ai/gaudi: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 180
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
extraCmdArgs: [
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048"
]