added the tuned tgi params
This commit is contained in:
@@ -26,7 +26,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
if num_nodes == 1:
|
if num_nodes == 1:
|
||||||
replicas = [
|
replicas = [
|
||||||
{"name": "chatqna-backend-server-deploy", "replicas": 2},
|
{"name": "chatqna-backend-server-deploy", "replicas": 2},
|
||||||
{"name": "embedding-dependency-deploy", "replicas": 2},
|
{"name": "embedding-dependency-deploy", "replicas": 1},
|
||||||
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
||||||
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
||||||
{"name": "dataprep-deploy", "replicas": 1},
|
{"name": "dataprep-deploy", "replicas": 1},
|
||||||
@@ -69,6 +69,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
{
|
{
|
||||||
"name": "llm-dependency-deploy",
|
"name": "llm-dependency-deploy",
|
||||||
"args": [
|
"args": [
|
||||||
|
{"name": "--model-id", "value": '$(LLM_MODEL_ID)'},
|
||||||
{"name": "--max-input-length", "value": 1280},
|
{"name": "--max-input-length", "value": 1280},
|
||||||
{"name": "--max-total-tokens", "value": 2048},
|
{"name": "--max-total-tokens", "value": 2048},
|
||||||
{"name": "--max-batch-total-tokens", "value": 35536},
|
{"name": "--max-batch-total-tokens", "value": 35536},
|
||||||
|
|||||||
@@ -31,9 +31,17 @@ spec:
|
|||||||
- envFrom:
|
- envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: {{ $global.config.CONFIG_MAP_NAME }}
|
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||||
|
|
||||||
|
{{- $args := $microservice.args }}
|
||||||
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
|
{{- if eq $podSpec.name $microservice.name }}
|
||||||
|
{{- $args = $podSpec.args | default $microservice.args }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{- if $microservice.args }}
|
{{- if $microservice.args }}
|
||||||
args:
|
args:
|
||||||
{{- range $arg := $microservice.args }}
|
{{- range $arg := $args }}
|
||||||
{{- if $arg.name }}
|
{{- if $arg.name }}
|
||||||
- {{ $arg.name }}
|
- {{ $arg.name }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|||||||
Reference in New Issue
Block a user