Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com> Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com> Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com> Co-authored-by: Artem Astafev <a.astafev@datamonsters.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <suyue.chen@intel.com>
41 lines
982 B
YAML
41 lines
982 B
YAML
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
|
|
|
tgi:
|
|
enabled: false
|
|
|
|
llm-uservice:
|
|
DOCSUM_BACKEND: "vLLM"
|
|
retryTimeoutSeconds: 720
|
|
|
|
vllm:
|
|
enabled: true
|
|
accelDevice: "rocm"
|
|
image:
|
|
repository: opea/vllm-rocm
|
|
tag: latest
|
|
env:
|
|
HIP_VISIBLE_DEVICES: "0"
|
|
TENSOR_PARALLEL_SIZE: "1"
|
|
HF_HUB_DISABLE_PROGRESS_BARS: "1"
|
|
HF_HUB_ENABLE_HF_TRANSFER: "0"
|
|
VLLM_USE_TRITON_FLASH_ATTN: "0"
|
|
VLLM_WORKER_MULTIPROC_METHOD: "spawn"
|
|
PYTORCH_JIT: "0"
|
|
HF_HOME: "/data"
|
|
extraCmd:
|
|
command: [ "python3", "/workspace/api_server.py" ]
|
|
extraCmdArgs: [ "--swap-space", "16",
|
|
"--disable-log-requests",
|
|
"--dtype", "float16",
|
|
"--num-scheduler-steps", "1",
|
|
"--distributed-executor-backend", "mp" ]
|
|
resources:
|
|
limits:
|
|
amd.com/gpu: "1"
|
|
startupProbe:
|
|
failureThreshold: 180
|
|
securityContext:
|
|
readOnlyRootFilesystem: false
|
|
runAsNonRoot: false
|
|
runAsUser: 0
|