Sync values yaml file for 1.3 release (#1748)
Signed-off-by: Dolpher Du <dolpher.du@intel.com>
This commit is contained in:
9
CodeGen/kubernetes/helm/cpu-tgi-values.yaml
Normal file
9
CodeGen/kubernetes/helm/cpu-tgi-values.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
enabled: true
|
||||
vllm:
|
||||
enabled: false
|
||||
llm-uservice:
|
||||
TEXTGEN_BACKEND: TGI
|
||||
@@ -2,4 +2,8 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
enabled: false
|
||||
vllm:
|
||||
enabled: true
|
||||
llm-uservice:
|
||||
TEXTGEN_BACKEND: vLLM
|
||||
|
||||
33
CodeGen/kubernetes/helm/gaudi-tgi-values.yaml
Normal file
33
CodeGen/kubernetes/helm/gaudi-tgi-values.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
enabled: true
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.3.1"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
CUDA_GRAPHS: ""
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
ENABLE_HPU_GRAPH: "true"
|
||||
LIMIT_HPU_GRAPH: "true"
|
||||
USE_FLASH_ATTENTION: "true"
|
||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
vllm:
|
||||
enabled: false
|
||||
llm-uservice:
|
||||
TEXTGEN_BACKEND: TGI
|
||||
@@ -2,32 +2,26 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
enabled: false
|
||||
|
||||
vllm:
|
||||
enabled: true
|
||||
accelDevice: "gaudi"
|
||||
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.3.1"
|
||||
repository: opea/vllm-gaudi
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
startupProbe:
|
||||
failureThreshold: 360
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
CUDA_GRAPHS: ""
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
ENABLE_HPU_GRAPH: "true"
|
||||
LIMIT_HPU_GRAPH: "true"
|
||||
USE_FLASH_ATTENTION: "true"
|
||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
extraCmdArgs: [
|
||||
"--tensor-parallel-size", "1",
|
||||
"--block-size", "128",
|
||||
"--max-num-seqs", "256",
|
||||
]
|
||||
|
||||
llm-uservice:
|
||||
TEXTGEN_BACKEND: vLLM
|
||||
retryTimeoutSeconds: 720
|
||||
|
||||
Reference in New Issue
Block a user