Files
GenAIExamples/FaqGen/benchmark/performance/helm_charts/values.yaml
2024-10-21 11:06:37 +03:00

103 lines
2.2 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
namespace: default
config:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
CONFIG_MAP_NAME: faq-config
NODE_SELECTOR: opea
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
microservices:
- name: faq-mega-server-deploy
image: opea/chatqna:latest
replicas: 1
ports:
- containerPort: 7777
- name: faq-micro-deploy
image: opea/llm-faqgen-tgi:latest
replicas: 1
ports:
- containerPort: 9000
- name: faq-tgi-deploy
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
replicas: 1
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: faq-micro-svc
spec:
ports:
- name: service
port: 9003
targetPort: 9000
selector:
app: faq-micro-deploy
type: ClusterIP
- name: faq-tgi-svc
spec:
ports:
- name: service
port: 8010
targetPort: 80
selector:
app: faq-tgi-deploy
type: ClusterIP
- name: faq-mega-server-svc
spec:
ports:
- name: service
port: 7779
targetPort: 7777
nodePort: 30779
selector:
app: faq-mega-server-deploy
type: NodePort