removed spec

This commit is contained in:
Zhenzhong Xu
2024-10-21 09:01:00 +03:00
parent a0b2263fd3
commit 24166615d7
3 changed files with 142 additions and 156 deletions

View File

@@ -5,53 +5,46 @@ HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs: podSpecs:
- name: chatqna-backend-server-deploy - name: chatqna-backend-server-deploy
spec: replicas: 2
replicas: 2 resources:
resources: limits:
limits: cpu: "8"
cpu: "8" memory: "8000Mi"
memory: "8000Mi" requests:
requests: cpu: "8"
cpu: "8" memory: "8000Mi"
memory: "8000Mi"
- name: embedding-dependency-deploy - name: embedding-dependency-deploy
spec: replicas: 1
replicas: 1 resources:
resources: limits:
limits: cpu: "80"
cpu: "80" memory: "20000Mi"
memory: "20000Mi" requests:
requests: cpu: "80"
cpu: "80" memory: "20000Mi"
memory: "20000Mi"
- name: reranking-dependency-deploy - name: reranking-dependency-deploy
spec: replicas: 1
replicas: 1 resources:
resources: limits:
limits: habana.ai/gaudi: 1
habana.ai/gaudi: 1
- name: llm-dependency-deploy - name: llm-dependency-deploy
spec: replicas: 7
replicas: 7890 resources:
resources: limits:
limits: habana.ai/gaudi: 1
habana.ai/gaudi: 1
- name: dataprep-deploy - name: dataprep-deploy
spec: replicas: 1
replicas: 1
- name: vector-db - name: vector-db
spec: replicas: 1
replicas: 1
- name: retriever-deploy - name: retriever-deploy
spec: replicas: 2
replicas: 2 resources:
resources: requests:
requests: cpu: "4"
cpu: "4" memory: "4000Mi"
memory: "4000Mi"

View File

@@ -12,7 +12,7 @@ spec:
{{- $replicas := $deployment.replicas }} {{- $replicas := $deployment.replicas }}
{{- range $podSpec := $global.podSpecs }} {{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }} {{- if eq $podSpec.name $deployment.name }}
{{- $replicas = $podSpec.spec.replicas | default $deployment.replicas }} {{- $replicas = $podSpec.replicas | default $deployment.replicas }}
{{- end }} {{- end }}
{{- end }} {{- end }}
replicas: {{ $replicas }} replicas: {{ $replicas }}
@@ -31,9 +31,9 @@ spec:
- envFrom: - envFrom:
- configMapRef: - configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }} name: {{ $global.config.CONFIG_MAP_NAME }}
{{- if $deployment.spec.args }} {{- if $deployment.args }}
args: args:
{{- range $arg := $deployment.spec.args }} {{- range $arg := $deployment.args }}
{{- if $arg.name }} {{- if $arg.name }}
- {{ $arg.name }} - {{ $arg.name }}
{{- end }} {{- end }}
@@ -43,9 +43,9 @@ spec:
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if $deployment.spec.env }} {{- if $deployment.env }}
env: env:
{{- range $env := $deployment.spec.env }} {{- range $env := $deployment.env }}
- name: {{ $env.name }} - name: {{ $env.name }}
value: "{{ $env.value }}" value: "{{ $env.value }}"
{{- end }} {{- end }}
@@ -55,20 +55,20 @@ spec:
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
name: {{ $deployment.name }} name: {{ $deployment.name }}
{{- if $deployment.spec.ports }} {{- if $deployment.ports }}
ports: ports:
{{- range $port := $deployment.spec.ports }} {{- range $port := $deployment.ports }}
{{- range $port_name, $port_id := $port }} {{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }} - {{ $port_name }}: {{ $port_id }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- $resources := $deployment.spec.resources }} {{- $resources := $deployment.resources }}
{{- range $podSpec := $global.podSpecs }} {{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }} {{- if eq $podSpec.name $deployment.name }}
{{- if $podSpec.spec.resources }} {{- if $podSpec.resources }}
{{- $resources = $podSpec.spec.resources }} {{- $resources = $podSpec.resources }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- end }} {{- end }}
@@ -83,9 +83,9 @@ spec:
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if $deployment.spec.volumeMounts }} {{- if $deployment.volumeMounts }}
volumeMounts: volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }} {{- range $volumeMount := $deployment.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }} - mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }} name: {{ $volumeMount.name }}
{{- end }} {{- end }}
@@ -104,9 +104,9 @@ spec:
whenUnsatisfiable: ScheduleAnyway whenUnsatisfiable: ScheduleAnyway
{{- if $deployment.spec.volumes }} {{- if $deployment.volumes }}
volumes: volumes:
{{- range $index, $volume := $deployment.spec.volumes }} {{- range $index, $volume := $deployment.volumes }}
- name: {{ $volume.name }} - name: {{ $volume.name }}
{{- if $volume.hostPath }} {{- if $volume.hostPath }}
hostPath: hostPath:

View File

@@ -14,134 +14,127 @@ deployments:
- name: chatqna-backend-server-deploy - name: chatqna-backend-server-deploy
image: opea/chatqna:latest image: opea/chatqna:latest
replicas: 1 replicas: 1
spec: ports:
ports: - containerPort: 8888
- containerPort: 8888
- name: dataprep-deploy - name: dataprep-deploy
image: opea/dataprep-redis:latest image: opea/dataprep-redis:latest
replicas: 1 replicas: 1
spec: ports:
ports: - containerPort: 6007
- containerPort: 6007
- name: vector-db - name: vector-db
image: redis/redis-stack:7.2.0-v9 image: redis/redis-stack:7.2.0-v9
replicas: 1 replicas: 1
spec: ports:
ports: - containerPort: 6379
- containerPort: 6379 - containerPort: 8001
- containerPort: 8001
- name: retriever-deploy - name: retriever-deploy
image: opea/retriever-redis:latest image: opea/retriever-redis:latest
replicas: 1 replicas: 1
spec: ports:
ports: - containerPort: 7000
- containerPort: 7000
- name: embedding-dependency-deploy - name: embedding-dependency-deploy
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
replicas: 1 replicas: 1
spec: ports:
ports: - containerPort: 80
- containerPort: 80 args:
args: - name: "--model-id"
- name: "--model-id" value: $(EMBEDDING_MODEL_ID)
value: $(EMBEDDING_MODEL_ID) - name: "--auto-truncate"
- name: "--auto-truncate" volumeMounts:
volumeMounts: - mountPath: /data
- mountPath: /data name: model-volume
name: model-volume - mountPath: /dev/shm
- mountPath: /dev/shm name: shm
name: shm volumes:
volumes: - hostPath:
- hostPath: path: /mnt/models
path: /mnt/models type: Directory
type: Directory name: model-volume
name: model-volume - emptyDir:
- emptyDir: medium: Memory
medium: Memory sizeLimit: 1Gi
sizeLimit: 1Gi name: shm
name: shm
- name: reranking-dependency-deploy - name: reranking-dependency-deploy
image: opea/tei-gaudi:latest image: opea/tei-gaudi:latest
replicas: 1 replicas: 1
spec: args:
args: - name: "--model-id"
- name: "--model-id" - value: $(RERANK_MODEL_ID)
- value: $(RERANK_MODEL_ID) - name: "--auto-truncate"
- name: "--auto-truncate" env:
env: - name: OMPI_MCA_btl_vader_single_copy_mechanism
- name: OMPI_MCA_btl_vader_single_copy_mechanism value: none
value: none - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES value: "true"
value: "true" - name: runtime
- name: runtime value: habana
value: habana - name: HABANA_VISIBLE_DEVICES
- name: HABANA_VISIBLE_DEVICES value: all
value: all - name: HF_TOKEN
- name: HF_TOKEN value: ${HF_TOKEN}
value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH
- name: MAX_WARMUP_SEQUENCE_LENGTH value: "512"
value: "512" volumeMounts:
volumeMounts: - mountPath: /data
- mountPath: /data name: model-volume
name: model-volume - mountPath: /dev/shm
- mountPath: /dev/shm name: shm
name: shm volumes:
volumes: - hostPath:
- hostPath: path: /mnt/models
path: /mnt/models type: Directory
type: Directory name: model-volume
name: model-volume - emptyDir:
- emptyDir: medium: Memory
medium: Memory sizeLimit: 1Gi
sizeLimit: 1Gi name: shm
name: shm
- name: llm-dependency-deploy - name: llm-dependency-deploy
image: ghcr.io/huggingface/tgi-gaudi:2.0.4 image: ghcr.io/huggingface/tgi-gaudi:2.0.4
replicas: 1 replicas: 1
spec: ports:
ports: - containerPort: 80
- containerPort: 80 resources:
resources: limits:
limits: habana.ai/gaudi: 1
habana.ai/gaudi: 1 args:
args: - name: "--model-id"
- name: "--model-id" value: $(LLM_MODEL_ID)
value: $(LLM_MODEL_ID) - name: "--max-input-length"
- name: "--max-input-length" value: "2048"
value: "2048" - name: "--max-total-tokens"
- name: "--max-total-tokens" value: "4096"
value: "4096" env:
env: - name: OMPI_MCA_btl_vader_single_copy_mechanism
- name: OMPI_MCA_btl_vader_single_copy_mechanism value: none
value: none - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES value: "true"
value: "true" - name: runtime
- name: runtime value: habana
value: habana - name: HABANA_VISIBLE_DEVICES
- name: HABANA_VISIBLE_DEVICES value: all
value: all - name: HF_TOKEN
- name: HF_TOKEN value: ${HF_TOKEN}
value: ${HF_TOKEN} volumeMounts:
volumeMounts: - mountPath: /data
- mountPath: /data name: model-volume
name: model-volume - mountPath: /dev/shm
- mountPath: /dev/shm name: shm
name: shm volumes:
volumes: - hostPath:
- hostPath: path: /mnt/models
path: /mnt/models type: Directory
type: Directory name: model-volume
name: model-volume - emptyDir:
- emptyDir: medium: Memory
medium: Memory sizeLimit: 1Gi
sizeLimit: 1Gi name: shm
name: shm
services: services:
- name: chatqna-backend-server-svc - name: chatqna-backend-server-svc