refactored GaqGen

This commit is contained in:
Zhenzhong Xu
2024-10-21 10:46:12 +03:00
parent 58ff7d9518
commit 4e1237d410
2 changed files with 68 additions and 75 deletions

View File

@@ -1,34 +1,23 @@
# Copyright (C) 2024 Intel Corporation # Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs: podSpecs:
- name: faq-mega-server-deploy - name: faq-mega-server-deploy
spec: replicas: 2
image_name: opea/chatqna resources:
image_tag: latest limits:
replicas: 2 cpu: "8"
resources: memory: "8000Mi"
limits: requests:
cpu: "8" cpu: "8"
memory: "8000Mi" memory: "8000Mi"
requests:
cpu: "8"
memory: "8000Mi"
- name: faq-tgi-deploy - name: faq-tgi-deploy
spec: replicas: 7
image_name: ghcr.io/huggingface/tgi-gaudi resources:
image_tag: 2.0.5 limits:
replicas: 7 habana.ai/gaudi: 1
resources:
limits:
habana.ai/gaudi: 1
- name: faq-micro-deploy - name: faq-micro-deploy
spec: replicas: 1
image_name: opea/llm-faqgen-tgi
image_tag: latest
replicas: 1

View File

@@ -7,63 +7,67 @@ config:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
CONFIG_MAP_NAME: faq-config CONFIG_MAP_NAME: faq-config
NODE_SELECTOR: faq-opea NODE_SELECTOR: faq-opea
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
deployments: microservices:
- name: faq-mega-server-deploy - name: faq-mega-server-deploy
spec: image: opea/chatqna:latest
ports: replicas: 1
- containerPort: 7777 ports:
- containerPort: 7777
- name: faq-micro-deploy - name: faq-micro-deploy
spec: image: opea/llm-faqgen-tgi:latest
ports: replicas: 1
- containerPort: 9000 ports:
- containerPort: 9000
- name: faq-tgi-deploy - name: faq-tgi-deploy
spec: image: ghcr.io/huggingface/tgi-gaudi:2.0.5
ports: replicas: 1
- containerPort: 80 ports:
resources: - containerPort: 80
limits: resources:
habana.ai/gaudi: 1 limits:
args: habana.ai/gaudi: 1
- name: "--model-id" args:
value: $(LLM_MODEL_ID) - name: "--model-id"
- name: "--max-input-length" value: $(LLM_MODEL_ID)
value: "2048" - name: "--max-input-length"
- name: "--max-total-tokens" value: "2048"
value: "4096" - name: "--max-total-tokens"
env: value: "4096"
- name: OMPI_MCA_btl_vader_single_copy_mechanism env:
value: none - name: OMPI_MCA_btl_vader_single_copy_mechanism
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES value: none
value: "true" - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
- name: runtime value: "true"
value: habana - name: runtime
- name: HABANA_VISIBLE_DEVICES value: habana
value: all - name: HABANA_VISIBLE_DEVICES
- name: ENABLE_HPU_GRAPH value: all
value: 'true' - name: ENABLE_HPU_GRAPH
- name: LIMIT_HPU_GRAPH value: 'true'
value: 'true' - name: LIMIT_HPU_GRAPH
- name: USE_FLASH_ATTENTION value: 'true'
value: 'true' - name: USE_FLASH_ATTENTION
- name: FLASH_ATTENTION_RECOMPUTE value: 'true'
value: 'true' - name: FLASH_ATTENTION_RECOMPUTE
volumeMounts: value: 'true'
- mountPath: /data volumeMounts:
name: model-volume - mountPath: /data
- mountPath: /dev/shm name: model-volume
name: shm - mountPath: /dev/shm
volumes: name: shm
- hostPath: volumes:
path: /mnt/models - hostPath:
type: Directory path: /mnt/models
name: model-volume type: Directory
- emptyDir: name: model-volume
medium: Memory - emptyDir:
sizeLimit: 1Gi medium: Memory
name: shm sizeLimit: 1Gi
name: shm
services: services:
- name: faq-micro-svc - name: faq-micro-svc