Files
GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
chyundunovDatamonsters 3b0bcb80a8 DocSum - Adding files to deploy an application in the K8S environment using Helm (#1758)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com>
Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com>
Co-authored-by: Artem Astafev <a.astafev@datamonsters.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2025-04-25 13:33:08 +08:00

41 lines
982 B
YAML

# Copyright (C) 2025 Advanced Micro Devices, Inc.
tgi:
enabled: false
llm-uservice:
DOCSUM_BACKEND: "vLLM"
retryTimeoutSeconds: 720
vllm:
enabled: true
accelDevice: "rocm"
image:
repository: opea/vllm-rocm
tag: latest
env:
HIP_VISIBLE_DEVICES: "0"
TENSOR_PARALLEL_SIZE: "1"
HF_HUB_DISABLE_PROGRESS_BARS: "1"
HF_HUB_ENABLE_HF_TRANSFER: "0"
VLLM_USE_TRITON_FLASH_ATTN: "0"
VLLM_WORKER_MULTIPROC_METHOD: "spawn"
PYTORCH_JIT: "0"
HF_HOME: "/data"
extraCmd:
command: [ "python3", "/workspace/api_server.py" ]
extraCmdArgs: [ "--swap-space", "16",
"--disable-log-requests",
"--dtype", "float16",
"--num-scheduler-steps", "1",
"--distributed-executor-backend", "mp" ]
resources:
limits:
amd.com/gpu: "1"
startupProbe:
failureThreshold: 180
securityContext:
readOnlyRootFilesystem: false
runAsNonRoot: false
runAsUser: 0