removed spec
This commit is contained in:
@@ -5,53 +5,46 @@ HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
|||||||
|
|
||||||
podSpecs:
|
podSpecs:
|
||||||
- name: chatqna-backend-server-deploy
|
- name: chatqna-backend-server-deploy
|
||||||
spec:
|
replicas: 2
|
||||||
replicas: 2
|
resources:
|
||||||
resources:
|
limits:
|
||||||
limits:
|
cpu: "8"
|
||||||
cpu: "8"
|
memory: "8000Mi"
|
||||||
memory: "8000Mi"
|
requests:
|
||||||
requests:
|
cpu: "8"
|
||||||
cpu: "8"
|
memory: "8000Mi"
|
||||||
memory: "8000Mi"
|
|
||||||
|
|
||||||
- name: embedding-dependency-deploy
|
- name: embedding-dependency-deploy
|
||||||
spec:
|
replicas: 1
|
||||||
replicas: 1
|
resources:
|
||||||
resources:
|
limits:
|
||||||
limits:
|
cpu: "80"
|
||||||
cpu: "80"
|
memory: "20000Mi"
|
||||||
memory: "20000Mi"
|
requests:
|
||||||
requests:
|
cpu: "80"
|
||||||
cpu: "80"
|
memory: "20000Mi"
|
||||||
memory: "20000Mi"
|
|
||||||
|
|
||||||
- name: reranking-dependency-deploy
|
- name: reranking-dependency-deploy
|
||||||
spec:
|
replicas: 1
|
||||||
replicas: 1
|
resources:
|
||||||
resources:
|
limits:
|
||||||
limits:
|
habana.ai/gaudi: 1
|
||||||
habana.ai/gaudi: 1
|
|
||||||
|
|
||||||
- name: llm-dependency-deploy
|
- name: llm-dependency-deploy
|
||||||
spec:
|
replicas: 7
|
||||||
replicas: 7890
|
resources:
|
||||||
resources:
|
limits:
|
||||||
limits:
|
habana.ai/gaudi: 1
|
||||||
habana.ai/gaudi: 1
|
|
||||||
|
|
||||||
- name: dataprep-deploy
|
- name: dataprep-deploy
|
||||||
spec:
|
replicas: 1
|
||||||
replicas: 1
|
|
||||||
|
|
||||||
- name: vector-db
|
- name: vector-db
|
||||||
spec:
|
replicas: 1
|
||||||
replicas: 1
|
|
||||||
|
|
||||||
- name: retriever-deploy
|
- name: retriever-deploy
|
||||||
spec:
|
replicas: 2
|
||||||
replicas: 2
|
resources:
|
||||||
resources:
|
requests:
|
||||||
requests:
|
cpu: "4"
|
||||||
cpu: "4"
|
memory: "4000Mi"
|
||||||
memory: "4000Mi"
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ spec:
|
|||||||
{{- $replicas := $deployment.replicas }}
|
{{- $replicas := $deployment.replicas }}
|
||||||
{{- range $podSpec := $global.podSpecs }}
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
{{- if eq $podSpec.name $deployment.name }}
|
{{- if eq $podSpec.name $deployment.name }}
|
||||||
{{- $replicas = $podSpec.spec.replicas | default $deployment.replicas }}
|
{{- $replicas = $podSpec.replicas | default $deployment.replicas }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
replicas: {{ $replicas }}
|
replicas: {{ $replicas }}
|
||||||
@@ -31,9 +31,9 @@ spec:
|
|||||||
- envFrom:
|
- envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: {{ $global.config.CONFIG_MAP_NAME }}
|
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||||
{{- if $deployment.spec.args }}
|
{{- if $deployment.args }}
|
||||||
args:
|
args:
|
||||||
{{- range $arg := $deployment.spec.args }}
|
{{- range $arg := $deployment.args }}
|
||||||
{{- if $arg.name }}
|
{{- if $arg.name }}
|
||||||
- {{ $arg.name }}
|
- {{ $arg.name }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
@@ -43,9 +43,9 @@ spec:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- if $deployment.spec.env }}
|
{{- if $deployment.env }}
|
||||||
env:
|
env:
|
||||||
{{- range $env := $deployment.spec.env }}
|
{{- range $env := $deployment.env }}
|
||||||
- name: {{ $env.name }}
|
- name: {{ $env.name }}
|
||||||
value: "{{ $env.value }}"
|
value: "{{ $env.value }}"
|
||||||
{{- end }}
|
{{- end }}
|
||||||
@@ -55,20 +55,20 @@ spec:
|
|||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: {{ $deployment.name }}
|
name: {{ $deployment.name }}
|
||||||
|
|
||||||
{{- if $deployment.spec.ports }}
|
{{- if $deployment.ports }}
|
||||||
ports:
|
ports:
|
||||||
{{- range $port := $deployment.spec.ports }}
|
{{- range $port := $deployment.ports }}
|
||||||
{{- range $port_name, $port_id := $port }}
|
{{- range $port_name, $port_id := $port }}
|
||||||
- {{ $port_name }}: {{ $port_id }}
|
- {{ $port_name }}: {{ $port_id }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- $resources := $deployment.spec.resources }}
|
{{- $resources := $deployment.resources }}
|
||||||
{{- range $podSpec := $global.podSpecs }}
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
{{- if eq $podSpec.name $deployment.name }}
|
{{- if eq $podSpec.name $deployment.name }}
|
||||||
{{- if $podSpec.spec.resources }}
|
{{- if $podSpec.resources }}
|
||||||
{{- $resources = $podSpec.spec.resources }}
|
{{- $resources = $podSpec.resources }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
@@ -83,9 +83,9 @@ spec:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- if $deployment.spec.volumeMounts }}
|
{{- if $deployment.volumeMounts }}
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
{{- range $volumeMount := $deployment.spec.volumeMounts }}
|
{{- range $volumeMount := $deployment.volumeMounts }}
|
||||||
- mountPath: {{ $volumeMount.mountPath }}
|
- mountPath: {{ $volumeMount.mountPath }}
|
||||||
name: {{ $volumeMount.name }}
|
name: {{ $volumeMount.name }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
@@ -104,9 +104,9 @@ spec:
|
|||||||
whenUnsatisfiable: ScheduleAnyway
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
|
||||||
|
|
||||||
{{- if $deployment.spec.volumes }}
|
{{- if $deployment.volumes }}
|
||||||
volumes:
|
volumes:
|
||||||
{{- range $index, $volume := $deployment.spec.volumes }}
|
{{- range $index, $volume := $deployment.volumes }}
|
||||||
- name: {{ $volume.name }}
|
- name: {{ $volume.name }}
|
||||||
{{- if $volume.hostPath }}
|
{{- if $volume.hostPath }}
|
||||||
hostPath:
|
hostPath:
|
||||||
|
|||||||
@@ -14,134 +14,127 @@ deployments:
|
|||||||
- name: chatqna-backend-server-deploy
|
- name: chatqna-backend-server-deploy
|
||||||
image: opea/chatqna:latest
|
image: opea/chatqna:latest
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
ports:
|
||||||
ports:
|
- containerPort: 8888
|
||||||
- containerPort: 8888
|
|
||||||
|
|
||||||
- name: dataprep-deploy
|
- name: dataprep-deploy
|
||||||
image: opea/dataprep-redis:latest
|
image: opea/dataprep-redis:latest
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
ports:
|
||||||
ports:
|
- containerPort: 6007
|
||||||
- containerPort: 6007
|
|
||||||
|
|
||||||
- name: vector-db
|
- name: vector-db
|
||||||
image: redis/redis-stack:7.2.0-v9
|
image: redis/redis-stack:7.2.0-v9
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
ports:
|
||||||
ports:
|
- containerPort: 6379
|
||||||
- containerPort: 6379
|
- containerPort: 8001
|
||||||
- containerPort: 8001
|
|
||||||
|
|
||||||
- name: retriever-deploy
|
- name: retriever-deploy
|
||||||
image: opea/retriever-redis:latest
|
image: opea/retriever-redis:latest
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
ports:
|
||||||
ports:
|
- containerPort: 7000
|
||||||
- containerPort: 7000
|
|
||||||
|
|
||||||
- name: embedding-dependency-deploy
|
- name: embedding-dependency-deploy
|
||||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
ports:
|
||||||
ports:
|
- containerPort: 80
|
||||||
- containerPort: 80
|
args:
|
||||||
args:
|
- name: "--model-id"
|
||||||
- name: "--model-id"
|
value: $(EMBEDDING_MODEL_ID)
|
||||||
value: $(EMBEDDING_MODEL_ID)
|
- name: "--auto-truncate"
|
||||||
- name: "--auto-truncate"
|
volumeMounts:
|
||||||
volumeMounts:
|
- mountPath: /data
|
||||||
- mountPath: /data
|
name: model-volume
|
||||||
name: model-volume
|
- mountPath: /dev/shm
|
||||||
- mountPath: /dev/shm
|
name: shm
|
||||||
name: shm
|
volumes:
|
||||||
volumes:
|
- hostPath:
|
||||||
- hostPath:
|
path: /mnt/models
|
||||||
path: /mnt/models
|
type: Directory
|
||||||
type: Directory
|
name: model-volume
|
||||||
name: model-volume
|
- emptyDir:
|
||||||
- emptyDir:
|
medium: Memory
|
||||||
medium: Memory
|
sizeLimit: 1Gi
|
||||||
sizeLimit: 1Gi
|
name: shm
|
||||||
name: shm
|
|
||||||
|
|
||||||
- name: reranking-dependency-deploy
|
- name: reranking-dependency-deploy
|
||||||
image: opea/tei-gaudi:latest
|
image: opea/tei-gaudi:latest
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
args:
|
||||||
args:
|
- name: "--model-id"
|
||||||
- name: "--model-id"
|
- value: $(RERANK_MODEL_ID)
|
||||||
- value: $(RERANK_MODEL_ID)
|
- name: "--auto-truncate"
|
||||||
- name: "--auto-truncate"
|
env:
|
||||||
env:
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
value: none
|
||||||
value: none
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
value: "true"
|
||||||
value: "true"
|
- name: runtime
|
||||||
- name: runtime
|
value: habana
|
||||||
value: habana
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
- name: HABANA_VISIBLE_DEVICES
|
value: all
|
||||||
value: all
|
- name: HF_TOKEN
|
||||||
- name: HF_TOKEN
|
value: ${HF_TOKEN}
|
||||||
value: ${HF_TOKEN}
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
value: "512"
|
||||||
value: "512"
|
volumeMounts:
|
||||||
volumeMounts:
|
- mountPath: /data
|
||||||
- mountPath: /data
|
name: model-volume
|
||||||
name: model-volume
|
- mountPath: /dev/shm
|
||||||
- mountPath: /dev/shm
|
name: shm
|
||||||
name: shm
|
volumes:
|
||||||
volumes:
|
- hostPath:
|
||||||
- hostPath:
|
path: /mnt/models
|
||||||
path: /mnt/models
|
type: Directory
|
||||||
type: Directory
|
name: model-volume
|
||||||
name: model-volume
|
- emptyDir:
|
||||||
- emptyDir:
|
medium: Memory
|
||||||
medium: Memory
|
sizeLimit: 1Gi
|
||||||
sizeLimit: 1Gi
|
name: shm
|
||||||
name: shm
|
|
||||||
|
|
||||||
- name: llm-dependency-deploy
|
- name: llm-dependency-deploy
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
replicas: 1
|
replicas: 1
|
||||||
spec:
|
ports:
|
||||||
ports:
|
- containerPort: 80
|
||||||
- containerPort: 80
|
resources:
|
||||||
resources:
|
limits:
|
||||||
limits:
|
habana.ai/gaudi: 1
|
||||||
habana.ai/gaudi: 1
|
args:
|
||||||
args:
|
- name: "--model-id"
|
||||||
- name: "--model-id"
|
value: $(LLM_MODEL_ID)
|
||||||
value: $(LLM_MODEL_ID)
|
- name: "--max-input-length"
|
||||||
- name: "--max-input-length"
|
value: "2048"
|
||||||
value: "2048"
|
- name: "--max-total-tokens"
|
||||||
- name: "--max-total-tokens"
|
value: "4096"
|
||||||
value: "4096"
|
env:
|
||||||
env:
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
value: none
|
||||||
value: none
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
value: "true"
|
||||||
value: "true"
|
- name: runtime
|
||||||
- name: runtime
|
value: habana
|
||||||
value: habana
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
- name: HABANA_VISIBLE_DEVICES
|
value: all
|
||||||
value: all
|
- name: HF_TOKEN
|
||||||
- name: HF_TOKEN
|
value: ${HF_TOKEN}
|
||||||
value: ${HF_TOKEN}
|
volumeMounts:
|
||||||
volumeMounts:
|
- mountPath: /data
|
||||||
- mountPath: /data
|
name: model-volume
|
||||||
name: model-volume
|
- mountPath: /dev/shm
|
||||||
- mountPath: /dev/shm
|
name: shm
|
||||||
name: shm
|
volumes:
|
||||||
volumes:
|
- hostPath:
|
||||||
- hostPath:
|
path: /mnt/models
|
||||||
path: /mnt/models
|
type: Directory
|
||||||
type: Directory
|
name: model-volume
|
||||||
name: model-volume
|
- emptyDir:
|
||||||
- emptyDir:
|
medium: Memory
|
||||||
medium: Memory
|
sizeLimit: 1Gi
|
||||||
sizeLimit: 1Gi
|
name: shm
|
||||||
name: shm
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
- name: chatqna-backend-server-svc
|
- name: chatqna-backend-server-svc
|
||||||
|
|||||||
Reference in New Issue
Block a user