DocSum - Adding files to deploy an application in the K8S environment using Helm (#1758)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com> Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com> Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com> Co-authored-by: Artem Astafev <a.astafev@datamonsters.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <suyue.chen@intel.com>
This commit is contained in:
committed by
GitHub
parent
ccc145ea1a
commit
3b0bcb80a8
@@ -16,3 +16,150 @@ helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGI
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on AMD ROCm using Helm charts from the binary Helm repository
|
||||
|
||||
```bash
|
||||
mkdir ~/docsum-k8s-install && cd ~/docsum-k8s-install
|
||||
```
|
||||
|
||||
### Cloning repos
|
||||
|
||||
```bash
|
||||
git clone git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
### Go to the installation directory
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/kubernetes/helm
|
||||
```
|
||||
|
||||
### Settings system variables
|
||||
|
||||
```bash
|
||||
export HFTOKEN="your_huggingface_token"
|
||||
export MODELDIR="/mnt/opea-models"
|
||||
export MODELNAME="Intel/neural-chat-7b-v3-3"
|
||||
```
|
||||
|
||||
### Setting variables in Values files
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- TENSOR_PARALLEL_SIZE - must match the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
#### If ROCm TGI used
|
||||
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- extraCmdArgs: [ "--num-shard","1" ] - replace "1" with the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
### Installing the Helm Chart
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
helm upgrade --install docsum oci://ghcr.io/opea-project/charts/docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values rocm-values.yaml
|
||||
```
|
||||
|
||||
#### If ROCm TGI used
|
||||
```bash
|
||||
helm upgrade --install docsum oci://ghcr.io/opea-project/charts/docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on AMD ROCm using Helm charts from Git repositories
|
||||
|
||||
### Creating working dirs
|
||||
|
||||
```bash
|
||||
mkdir ~/docsum-k8s-install && cd ~/docsum-k8s-install
|
||||
```
|
||||
|
||||
### Cloning repos
|
||||
|
||||
```bash
|
||||
git clone git clone https://github.com/opea-project/GenAIExamples.git
|
||||
git clone git clone https://github.com/opea-project/GenAIInfra.git
|
||||
```
|
||||
|
||||
### Go to the installation directory
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/kubernetes/helm
|
||||
```
|
||||
|
||||
### Settings system variables
|
||||
|
||||
```bash
|
||||
export HFTOKEN="your_huggingface_token"
|
||||
export MODELDIR="/mnt/opea-models"
|
||||
export MODELNAME="Intel/neural-chat-7b-v3-3"
|
||||
```
|
||||
|
||||
### Setting variables in Values files
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- TENSOR_PARALLEL_SIZE - must match the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
#### If ROCm TGI used
|
||||
|
||||
```bash
|
||||
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
|
||||
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
|
||||
- extraCmdArgs: [ "--num-shard","1" ] - replace "1" with the number of GPUs used
|
||||
- resources:
|
||||
limits:
|
||||
amd.com/gpu: "1" - replace "1" with the number of GPUs used
|
||||
|
||||
### Installing the Helm Chart
|
||||
|
||||
#### If ROCm vLLM used
|
||||
```bash
|
||||
cd ~/docsum-k8s-install/GenAIInfra/helm-charts
|
||||
./update_dependency.sh
|
||||
helm dependency update docsum
|
||||
helm upgrade --install docsum docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values ../../GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
|
||||
```
|
||||
|
||||
#### If ROCm TGI used
|
||||
```bash
|
||||
cd ~/docsum-k8s-install/GenAIInfra/helm-charts
|
||||
./update_dependency.sh
|
||||
helm dependency update docsum
|
||||
helm upgrade --install docsum docsum \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--values ../../GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
|
||||
```
|
||||
|
||||
45
DocSum/kubernetes/helm/rocm-tgi-values.yaml
Normal file
45
DocSum/kubernetes/helm/rocm-tgi-values.yaml
Normal file
@@ -0,0 +1,45 @@
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
tgi:
|
||||
enabled: true
|
||||
accelDevice: "rocm"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/text-generation-inference
|
||||
tag: "2.4.1-rocm"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
USE_FLASH_ATTENTION: "false"
|
||||
FLASH_ATTENTION_RECOMPUTE: "false"
|
||||
HIP_VISIBLE_DEVICES: "0"
|
||||
MAX_BATCH_SIZE: "4"
|
||||
extraCmdArgs: [ "--num-shard","1" ]
|
||||
resources:
|
||||
limits:
|
||||
amd.com/gpu: "1"
|
||||
requests:
|
||||
cpu: 1
|
||||
memory: 16Gi
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: false
|
||||
runAsUser: 0
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_PTRACE
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
startupProbe:
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
|
||||
llm-uservice:
|
||||
DOCSUM_BACKEND: "TGI"
|
||||
retryTimeoutSeconds: 720
|
||||
|
||||
vllm:
|
||||
enabled: false
|
||||
40
DocSum/kubernetes/helm/rocm-values.yaml
Normal file
40
DocSum/kubernetes/helm/rocm-values.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
tgi:
|
||||
enabled: false
|
||||
|
||||
llm-uservice:
|
||||
DOCSUM_BACKEND: "vLLM"
|
||||
retryTimeoutSeconds: 720
|
||||
|
||||
vllm:
|
||||
enabled: true
|
||||
accelDevice: "rocm"
|
||||
image:
|
||||
repository: opea/vllm-rocm
|
||||
tag: latest
|
||||
env:
|
||||
HIP_VISIBLE_DEVICES: "0"
|
||||
TENSOR_PARALLEL_SIZE: "1"
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: "1"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0"
|
||||
VLLM_USE_TRITON_FLASH_ATTN: "0"
|
||||
VLLM_WORKER_MULTIPROC_METHOD: "spawn"
|
||||
PYTORCH_JIT: "0"
|
||||
HF_HOME: "/data"
|
||||
extraCmd:
|
||||
command: [ "python3", "/workspace/api_server.py" ]
|
||||
extraCmdArgs: [ "--swap-space", "16",
|
||||
"--disable-log-requests",
|
||||
"--dtype", "float16",
|
||||
"--num-scheduler-steps", "1",
|
||||
"--distributed-executor-backend", "mp" ]
|
||||
resources:
|
||||
limits:
|
||||
amd.com/gpu: "1"
|
||||
startupProbe:
|
||||
failureThreshold: 180
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: false
|
||||
runAsUser: 0
|
||||
Reference in New Issue
Block a user