diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml index 5e156db45..01388e66a 100644 --- a/FaqGen/benchmark/performance/helm_charts/customize.yaml +++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml @@ -1,34 +1,23 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - podSpecs: - name: faq-mega-server-deploy - spec: - image_name: opea/chatqna - image_tag: latest - replicas: 2 - resources: - limits: - cpu: "8" - memory: "8000Mi" - requests: - cpu: "8" - memory: "8000Mi" + replicas: 2 + resources: + limits: + cpu: "8" + memory: "8000Mi" + requests: + cpu: "8" + memory: "8000Mi" - name: faq-tgi-deploy - spec: - image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.5 - replicas: 7 - resources: - limits: - habana.ai/gaudi: 1 + replicas: 7 + resources: + limits: + habana.ai/gaudi: 1 - name: faq-micro-deploy - spec: - image_name: opea/llm-faqgen-tgi - image_tag: latest - replicas: 1 + replicas: 1 diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index eeb206761..6f12074b8 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -7,63 +7,67 @@ config: LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct CONFIG_MAP_NAME: faq-config NODE_SELECTOR: faq-opea + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} -deployments: +microservices: - name: faq-mega-server-deploy - spec: - ports: - - containerPort: 7777 + image: opea/chatqna:latest + replicas: 1 + ports: + - containerPort: 7777 - name: faq-micro-deploy - spec: - ports: - - containerPort: 9000 + image: opea/llm-faqgen-tgi:latest + replicas: 1 + ports: + - containerPort: 9000 - name: faq-tgi-deploy - spec: - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - value: $(LLM_MODEL_ID) - - name: "--max-input-length" - value: "2048" - - name: "--max-total-tokens" - value: "4096" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: ENABLE_HPU_GRAPH - value: 'true' - - name: LIMIT_HPU_GRAPH - value: 'true' - - name: USE_FLASH_ATTENTION - value: 'true' - - name: FLASH_ATTENTION_RECOMPUTE - value: 'true' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + replicas: 1 + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + args: + - name: "--model-id" + value: $(LLM_MODEL_ID) + - name: "--max-input-length" + value: "2048" + - name: "--max-total-tokens" + value: "4096" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: ENABLE_HPU_GRAPH + value: 'true' + - name: LIMIT_HPU_GRAPH + value: 'true' + - name: USE_FLASH_ATTENTION + value: 'true' + - name: FLASH_ATTENTION_RECOMPUTE + value: 'true' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm services: - name: faq-micro-svc