added the tuned tgi params
This commit is contained in:
@@ -26,7 +26,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
||||
if num_nodes == 1:
|
||||
replicas = [
|
||||
{"name": "chatqna-backend-server-deploy", "replicas": 2},
|
||||
{"name": "embedding-dependency-deploy", "replicas": 2},
|
||||
{"name": "embedding-dependency-deploy", "replicas": 1},
|
||||
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
||||
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
||||
{"name": "dataprep-deploy", "replicas": 1},
|
||||
@@ -69,6 +69,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
||||
{
|
||||
"name": "llm-dependency-deploy",
|
||||
"args": [
|
||||
{"name": "--model-id", "value": '$(LLM_MODEL_ID)'},
|
||||
{"name": "--max-input-length", "value": 1280},
|
||||
{"name": "--max-total-tokens", "value": 2048},
|
||||
{"name": "--max-batch-total-tokens", "value": 35536},
|
||||
|
||||
@@ -31,9 +31,17 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||
|
||||
{{- $args := $microservice.args }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $args = $podSpec.args | default $microservice.args }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.args }}
|
||||
args:
|
||||
{{- range $arg := $microservice.args }}
|
||||
{{- range $arg := $args }}
|
||||
{{- if $arg.name }}
|
||||
- {{ $arg.name }}
|
||||
{{- end }}
|
||||
|
||||
Reference in New Issue
Block a user