removed spec

2024-10-21 09:01:00 +03:00
parent a0b2263fd3
commit 24166615d7
3 changed files with 142 additions and 156 deletions
--- a/ChatQnA/benchmark/performance/helm_charts/customize.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/customize.yaml
@@ -5,53 +5,46 @@ HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 podSpecs:
  - name: chatqna-backend-server-deploy
-    spec:
+    replicas: 2
-      replicas: 2
+    resources:
-      resources:
+      limits:
-        limits:
+        cpu: "8"
-          cpu: "8"
+        memory: "8000Mi"
-          memory: "8000Mi"
+      requests:
-        requests:
+        cpu: "8"
-          cpu: "8"
+        memory: "8000Mi"
          memory: "8000Mi"
  - name: embedding-dependency-deploy
-    spec:
+    replicas: 1
-      replicas: 1
+    resources:
-      resources:
+      limits:
-        limits:
+        cpu: "80"
-          cpu: "80"
+        memory: "20000Mi"
-          memory: "20000Mi"
+      requests:
-        requests:
+        cpu: "80"
-          cpu: "80"
+        memory: "20000Mi"
          memory: "20000Mi"
  - name: reranking-dependency-deploy
-    spec:
+    replicas: 1
-      replicas: 1
+    resources:
-      resources:
+      limits:
-        limits:
+        habana.ai/gaudi: 1
          habana.ai/gaudi: 1
  - name: llm-dependency-deploy
-    spec:
+    replicas: 7
-      replicas: 7890
+    resources:
-      resources:
+      limits:
-        limits:
+        habana.ai/gaudi: 1
          habana.ai/gaudi: 1
  - name: dataprep-deploy
-    spec:
+    replicas: 1
      replicas: 1
  - name: vector-db
-    spec:
+    replicas: 1
      replicas: 1
  - name: retriever-deploy
-    spec:
+    replicas: 2
-      replicas: 2
+    resources:
-      resources:
+      requests:
-        requests:
+        cpu: "4"
-          cpu: "4"
+        memory: "4000Mi"
          memory: "4000Mi"
--- a/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -12,7 +12,7 @@ spec:
  {{- $replicas := $deployment.replicas }}
  {{- range $podSpec := $global.podSpecs }}
    {{- if eq $podSpec.name $deployment.name }}
-      {{- $replicas = $podSpec.spec.replicas | default $deployment.replicas }}
+      {{- $replicas = $podSpec.replicas | default $deployment.replicas }}
    {{- end }}
  {{- end }}
  replicas: {{ $replicas }}
@@ -31,9 +31,9 @@ spec:
      - envFrom:
        - configMapRef:
            name: {{ $global.config.CONFIG_MAP_NAME }}
-        {{- if $deployment.spec.args }}
+        {{- if $deployment.args }}
        args:
-        {{- range $arg := $deployment.spec.args }}
+        {{- range $arg := $deployment.args }}
          {{- if $arg.name }}
          - {{ $arg.name }}
          {{- end }}
@@ -43,9 +43,9 @@ spec:
        {{- end }}
        {{- end }}
-        {{- if $deployment.spec.env }}
+        {{- if $deployment.env }}
        env:
-        {{- range $env := $deployment.spec.env }}
+        {{- range $env := $deployment.env }}
          - name: {{ $env.name }}
            value: "{{ $env.value }}"
        {{- end }}
@@ -55,20 +55,20 @@ spec:
        imagePullPolicy: IfNotPresent
        name: {{ $deployment.name }}
-        {{- if $deployment.spec.ports }}
+        {{- if $deployment.ports }}
        ports:
-        {{- range $port := $deployment.spec.ports }}
+        {{- range $port := $deployment.ports }}
          {{- range $port_name, $port_id := $port }}
          - {{ $port_name }}: {{ $port_id }}
          {{- end }}
        {{- end }}
        {{- end }}
-        {{- $resources := $deployment.spec.resources }}
+        {{- $resources := $deployment.resources }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $deployment.name }}
-            {{- if $podSpec.spec.resources }}
+            {{- if $podSpec.resources }}
-              {{- $resources = $podSpec.spec.resources }}
+              {{- $resources = $podSpec.resources }}
            {{- end }}
          {{- end }}
        {{- end }}
@@ -83,9 +83,9 @@ spec:
        {{- end }}
        {{- end }}
-        {{- if $deployment.spec.volumeMounts }}
+        {{- if $deployment.volumeMounts }}
        volumeMounts:
-        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+        {{- range $volumeMount := $deployment.volumeMounts }}
          - mountPath: {{ $volumeMount.mountPath }}
            name: {{ $volumeMount.name }}
        {{- end }}
@@ -104,9 +104,9 @@ spec:
        whenUnsatisfiable: ScheduleAnyway
-      {{- if $deployment.spec.volumes }}
+      {{- if $deployment.volumes }}
      volumes:
-      {{- range $index, $volume := $deployment.spec.volumes }}
+      {{- range $index, $volume := $deployment.volumes }}
        - name: {{ $volume.name }}
          {{- if $volume.hostPath }}
          hostPath:
--- a/ChatQnA/benchmark/performance/helm_charts/values.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/values.yaml
@@ -14,134 +14,127 @@ deployments:
  - name: chatqna-backend-server-deploy
    image: opea/chatqna:latest
    replicas: 1
-    spec:
+    ports:
-      ports:
+      - containerPort: 8888
        - containerPort: 8888
  - name: dataprep-deploy
    image: opea/dataprep-redis:latest
    replicas: 1
-    spec:
+    ports:
-      ports:
+      - containerPort: 6007
        - containerPort: 6007
  - name: vector-db
    image: redis/redis-stack:7.2.0-v9
    replicas: 1
-    spec:
+    ports:
-      ports:
+      - containerPort: 6379
-        - containerPort: 6379
+      - containerPort: 8001
        - containerPort: 8001
  - name: retriever-deploy
    image: opea/retriever-redis:latest
    replicas: 1
-    spec:
+    ports:
-      ports:
+      - containerPort: 7000
        - containerPort: 7000
  - name: embedding-dependency-deploy
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    replicas: 1
-    spec:
+    ports:
-      ports:
+      - containerPort: 80
-        - containerPort: 80
+    args:
-      args:
+      - name: "--model-id"
-        - name: "--model-id"
+        value: $(EMBEDDING_MODEL_ID)
-          value: $(EMBEDDING_MODEL_ID)
+      - name: "--auto-truncate"
-        - name: "--auto-truncate"
+    volumeMounts:
-      volumeMounts:
+      - mountPath: /data
-        - mountPath: /data
+        name: model-volume
-          name: model-volume
+      - mountPath: /dev/shm
-        - mountPath: /dev/shm
+        name: shm
-          name: shm
+    volumes:
-      volumes:
+      - hostPath:
-        - hostPath:
+          path: /mnt/models
-            path: /mnt/models
+          type: Directory
-            type: Directory
+        name: model-volume
-          name: model-volume
+      - emptyDir:
-        - emptyDir:
+          medium: Memory
-            medium: Memory
+          sizeLimit: 1Gi
-            sizeLimit: 1Gi
+        name: shm
          name: shm
  - name: reranking-dependency-deploy
    image: opea/tei-gaudi:latest
    replicas: 1
-    spec:
+    args:
-      args:
+      - name: "--model-id"
-        - name: "--model-id"
+      - value: $(RERANK_MODEL_ID)
-        - value: $(RERANK_MODEL_ID)
+      - name: "--auto-truncate"
-        - name: "--auto-truncate"
+    env:
-      env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
-          value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: "true"
-          value: "true"
+      - name: runtime
-        - name: runtime
+        value: habana
-          value: habana
+      - name: HABANA_VISIBLE_DEVICES
-        - name: HABANA_VISIBLE_DEVICES
+        value: all
-          value: all
+      - name: HF_TOKEN
-        - name: HF_TOKEN
+        value: ${HF_TOKEN}
-          value: ${HF_TOKEN}
+      - name: MAX_WARMUP_SEQUENCE_LENGTH
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
+        value: "512"
-          value: "512"
+    volumeMounts:
-      volumeMounts:
+      - mountPath: /data
-        - mountPath: /data
+        name: model-volume
-          name: model-volume
+      - mountPath: /dev/shm
-        - mountPath: /dev/shm
+        name: shm
-          name: shm
+    volumes:
-      volumes:
+      - hostPath:
-        - hostPath:
+          path: /mnt/models
-            path: /mnt/models
+          type: Directory
-            type: Directory
+        name: model-volume
-          name: model-volume
+      - emptyDir:
-        - emptyDir:
+          medium: Memory
-            medium: Memory
+          sizeLimit: 1Gi
-            sizeLimit: 1Gi
+        name: shm
          name: shm
  - name: llm-dependency-deploy
    image: ghcr.io/huggingface/tgi-gaudi:2.0.4
    replicas: 1
-    spec:
+    ports:
-      ports:
+      - containerPort: 80
-        - containerPort: 80
+    resources:
-      resources:
+      limits:
-        limits:
+        habana.ai/gaudi: 1
-          habana.ai/gaudi: 1
+    args:
-      args:
+      - name: "--model-id"
-        - name: "--model-id"
+        value: $(LLM_MODEL_ID)
-          value: $(LLM_MODEL_ID)
+      - name: "--max-input-length"
-        - name: "--max-input-length"
+        value: "2048"
-          value: "2048"
+      - name: "--max-total-tokens"
-        - name: "--max-total-tokens"
+        value: "4096"
-          value: "4096"
+    env:
-      env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
-          value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: "true"
-          value: "true"
+      - name: runtime
-        - name: runtime
+        value: habana
-          value: habana
+      - name: HABANA_VISIBLE_DEVICES
-        - name: HABANA_VISIBLE_DEVICES
+        value: all
-          value: all
+      - name: HF_TOKEN
-        - name: HF_TOKEN
+        value: ${HF_TOKEN}
-          value: ${HF_TOKEN}
+    volumeMounts:
-      volumeMounts:
+      - mountPath: /data
-        - mountPath: /data
+        name: model-volume
-          name: model-volume
+      - mountPath: /dev/shm
-        - mountPath: /dev/shm
+        name: shm
-          name: shm
+    volumes:
-      volumes:
+      - hostPath:
-        - hostPath:
+          path: /mnt/models
-            path: /mnt/models
+          type: Directory
-            type: Directory
+        name: model-volume
-          name: model-volume
+      - emptyDir:
-        - emptyDir:
+          medium: Memory
-            medium: Memory
+          sizeLimit: 1Gi
-            sizeLimit: 1Gi
+        name: shm
          name: shm
 services:
  - name: chatqna-backend-server-svc