Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com> Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com> Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com> Co-authored-by: Artem Astafev <a.astafev@datamonsters.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <suyue.chen@intel.com>
46 lines
952 B
YAML
46 lines
952 B
YAML
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
|
|
|
tgi:
|
|
enabled: true
|
|
accelDevice: "rocm"
|
|
image:
|
|
repository: ghcr.io/huggingface/text-generation-inference
|
|
tag: "2.4.1-rocm"
|
|
MAX_INPUT_LENGTH: "1024"
|
|
MAX_TOTAL_TOKENS: "2048"
|
|
USE_FLASH_ATTENTION: "false"
|
|
FLASH_ATTENTION_RECOMPUTE: "false"
|
|
HIP_VISIBLE_DEVICES: "0"
|
|
MAX_BATCH_SIZE: "4"
|
|
extraCmdArgs: [ "--num-shard","1" ]
|
|
resources:
|
|
limits:
|
|
amd.com/gpu: "1"
|
|
requests:
|
|
cpu: 1
|
|
memory: 16Gi
|
|
securityContext:
|
|
readOnlyRootFilesystem: false
|
|
runAsNonRoot: false
|
|
runAsUser: 0
|
|
capabilities:
|
|
add:
|
|
- SYS_PTRACE
|
|
readinessProbe:
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 5
|
|
timeoutSeconds: 1
|
|
failureThreshold: 120
|
|
startupProbe:
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 5
|
|
timeoutSeconds: 1
|
|
failureThreshold: 120
|
|
|
|
llm-uservice:
|
|
DOCSUM_BACKEND: "TGI"
|
|
retryTimeoutSeconds: 720
|
|
|
|
vllm:
|
|
enabled: false
|