GenAIExamples/FaqGen/kubernetes/manifests/xeon/faqgen.yaml

---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
  name: faq-tgi-cpu-deploy
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: faq-tgi-cpu-deploy
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: faq-tgi-cpu-deploy
    spec:
      hostIPC: true
      securityContext: {}
      containers:
      - name: faq-tgi-cpu-deploy-demo
        env:
          - name: HUGGING_FACE_HUB_TOKEN
            value: "insert-your-huggingface-token-here"
          - name: PORT
            value: "80"
        image: ghcr.io/huggingface/text-generation-inference:1.4
        imagePullPolicy: IfNotPresent
        securityContext: {}
        args:
        - --model-id
        - 'meta-llama/Meta-Llama-3-8B-Instruct'
        - --cuda_graphs
        - '0'
        - --max-input-length
        - '3096'
        - --max-total-tokens
        - '4096'
        volumeMounts:
        - mountPath: /data
          name: model-volume
        - mountPath: /dev/shm
          name: shm
        ports:
        - containerPort: 80
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
          path: /home/sdp/cesg
          type: Directory
      - name: shm
        emptyDir:
          medium: Memory
          sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
  name: faq-tgi-cpu-svc
spec:
  type: ClusterIP
  selector:
    app: faq-tgi-cpu-deploy
  ports:
  - name: service
    port: 8011
    targetPort: 80
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: faq-micro-cpu-deploy
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: faq-micro-cpu-deploy
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: faq-micro-cpu-deploy
    spec:
      hostIPC: true
      containers:
        - name: faq-micro-cpu-deploy
          env:
            - name: TGI_LLM_ENDPOINT
              value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
            - name: HUGGINGFACEHUB_API_TOKEN
              value: "insert-your-huggingface-token-here"
          image: opea/llm-faqgen-tgi:v0.9
          imagePullPolicy: IfNotPresent
          args: null
          ports:
          - containerPort: 9000
      serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
  name: faq-micro-cpu-svc
spec:
  type: ClusterIP
  selector:
    app: faq-micro-cpu-deploy
  ports:
  - name: service
    port: 9004
    targetPort: 9000
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: faq-mega-server-cpu-deploy
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: faq-mega-server-cpu-deploy
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: faq-mega-server-cpu-deploy
    spec:
      hostIPC: true
      containers:
        - name: faq-mega-server-cpu-deploy
          env:
            - name: LLM_SERVICE_HOST_IP
              value: faq-micro-cpu-svc
            - name: LLM_SERVICE_PORT
              value: "9004"
            - name: MEGA_SERVICE_HOST_IP
              value: faq-mega-server-cpu-svc
            - name: MEGA_SERVICE_PORT
              value: "7777"
          image: opea/faqgen:v0.9
          imagePullPolicy: IfNotPresent
          args: null
          ports:
          - containerPort: 7777
      serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
  name: faq-mega-server-cpu-svc
spec:
  type: NodePort
  selector:
    app: faq-mega-server-cpu-deploy
  ports:
  - name: service
    port: 7778
    targetPort: 7777
    nodePort: 30778