GenAIExamples/FaqGen/benchmark/performance/helm_charts/values.yaml

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

namespace: default

config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: faq-config
  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}

microservices:
  - name: faq-mega-server-deploy
    image: opea/chatqna:latest
    replicas: 1
    ports:
      - containerPort: 7777

  - name: faq-micro-deploy
    image: opea/llm-faqgen-tgi:latest
    replicas: 1
    ports:
      - containerPort: 9000

  - name: faq-tgi-deploy
    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
    replicas: 1
    ports:
      - containerPort: 80
    resources:
      limits:
        habana.ai/gaudi: 1
    args:
      - name: "--model-id"
        value: $(LLM_MODEL_ID)
      - name: "--max-input-length"
        value: "2048"
      - name: "--max-total-tokens"
        value: "4096"
    env:
      - name: OMPI_MCA_btl_vader_single_copy_mechanism
        value: none
      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
        value: "true"
      - name: runtime
        value: habana
      - name: HABANA_VISIBLE_DEVICES
        value: all
      - name: ENABLE_HPU_GRAPH
        value: 'true'
      - name: LIMIT_HPU_GRAPH
        value: 'true'
      - name: USE_FLASH_ATTENTION
        value: 'true'
      - name: FLASH_ATTENTION_RECOMPUTE
        value: 'true'
    volumeMounts:
      - mountPath: /data
        name: model-volume
      - mountPath: /dev/shm
        name: shm
    volumes:
      - hostPath:
          path: /mnt/models
          type: Directory
        name: model-volume
      - emptyDir:
          medium: Memory
          sizeLimit: 1Gi
        name: shm

services:
  - name: faq-micro-svc
    spec:
      ports:
        - name: service
          port: 9003
          targetPort: 9000
      selector:
        app: faq-micro-deploy
      type: ClusterIP

  - name: faq-tgi-svc
    spec:
      ports:
        - name: service
          port: 8010
          targetPort: 80
      selector:
        app: faq-tgi-deploy
      type: ClusterIP

  - name: faq-mega-server-svc
    spec:
      ports:
        - name: service
          port: 7779
          targetPort: 7777
          nodePort: 30779
      selector:
        app: faq-mega-server-deploy
      type: NodePort