Files
GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
chyundunovDatamonsters 3b0bcb80a8 DocSum - Adding files to deploy an application in the K8S environment using Helm (#1758)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com>
Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com>
Co-authored-by: Artem Astafev <a.astafev@datamonsters.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2025-04-25 13:33:08 +08:00

46 lines
952 B
YAML

# Copyright (C) 2025 Advanced Micro Devices, Inc.
tgi:
enabled: true
accelDevice: "rocm"
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: "2.4.1-rocm"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
USE_FLASH_ATTENTION: "false"
FLASH_ATTENTION_RECOMPUTE: "false"
HIP_VISIBLE_DEVICES: "0"
MAX_BATCH_SIZE: "4"
extraCmdArgs: [ "--num-shard","1" ]
resources:
limits:
amd.com/gpu: "1"
requests:
cpu: 1
memory: 16Gi
securityContext:
readOnlyRootFilesystem: false
runAsNonRoot: false
runAsUser: 0
capabilities:
add:
- SYS_PTRACE
readinessProbe:
initialDelaySeconds: 60
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
startupProbe:
initialDelaySeconds: 60
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
llm-uservice:
DOCSUM_BACKEND: "TGI"
retryTimeoutSeconds: 720
vllm:
enabled: false