refactored GaqGen
This commit is contained in:
@@ -1,34 +1,23 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
|
||||||
|
|
||||||
podSpecs:
|
podSpecs:
|
||||||
- name: faq-mega-server-deploy
|
- name: faq-mega-server-deploy
|
||||||
spec:
|
replicas: 2
|
||||||
image_name: opea/chatqna
|
resources:
|
||||||
image_tag: latest
|
limits:
|
||||||
replicas: 2
|
cpu: "8"
|
||||||
resources:
|
memory: "8000Mi"
|
||||||
limits:
|
requests:
|
||||||
cpu: "8"
|
cpu: "8"
|
||||||
memory: "8000Mi"
|
memory: "8000Mi"
|
||||||
requests:
|
|
||||||
cpu: "8"
|
|
||||||
memory: "8000Mi"
|
|
||||||
|
|
||||||
|
|
||||||
- name: faq-tgi-deploy
|
- name: faq-tgi-deploy
|
||||||
spec:
|
replicas: 7
|
||||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
resources:
|
||||||
image_tag: 2.0.5
|
limits:
|
||||||
replicas: 7
|
habana.ai/gaudi: 1
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
habana.ai/gaudi: 1
|
|
||||||
|
|
||||||
- name: faq-micro-deploy
|
- name: faq-micro-deploy
|
||||||
spec:
|
replicas: 1
|
||||||
image_name: opea/llm-faqgen-tgi
|
|
||||||
image_tag: latest
|
|
||||||
replicas: 1
|
|
||||||
|
|||||||
@@ -7,63 +7,67 @@ config:
|
|||||||
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
CONFIG_MAP_NAME: faq-config
|
CONFIG_MAP_NAME: faq-config
|
||||||
NODE_SELECTOR: faq-opea
|
NODE_SELECTOR: faq-opea
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
|
||||||
deployments:
|
microservices:
|
||||||
- name: faq-mega-server-deploy
|
- name: faq-mega-server-deploy
|
||||||
spec:
|
image: opea/chatqna:latest
|
||||||
ports:
|
replicas: 1
|
||||||
- containerPort: 7777
|
ports:
|
||||||
|
- containerPort: 7777
|
||||||
|
|
||||||
- name: faq-micro-deploy
|
- name: faq-micro-deploy
|
||||||
spec:
|
image: opea/llm-faqgen-tgi:latest
|
||||||
ports:
|
replicas: 1
|
||||||
- containerPort: 9000
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
|
||||||
- name: faq-tgi-deploy
|
- name: faq-tgi-deploy
|
||||||
spec:
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
ports:
|
replicas: 1
|
||||||
- containerPort: 80
|
ports:
|
||||||
resources:
|
- containerPort: 80
|
||||||
limits:
|
resources:
|
||||||
habana.ai/gaudi: 1
|
limits:
|
||||||
args:
|
habana.ai/gaudi: 1
|
||||||
- name: "--model-id"
|
args:
|
||||||
value: $(LLM_MODEL_ID)
|
- name: "--model-id"
|
||||||
- name: "--max-input-length"
|
value: $(LLM_MODEL_ID)
|
||||||
value: "2048"
|
- name: "--max-input-length"
|
||||||
- name: "--max-total-tokens"
|
value: "2048"
|
||||||
value: "4096"
|
- name: "--max-total-tokens"
|
||||||
env:
|
value: "4096"
|
||||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
env:
|
||||||
value: none
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
value: none
|
||||||
value: "true"
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
- name: runtime
|
value: "true"
|
||||||
value: habana
|
- name: runtime
|
||||||
- name: HABANA_VISIBLE_DEVICES
|
value: habana
|
||||||
value: all
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
- name: ENABLE_HPU_GRAPH
|
value: all
|
||||||
value: 'true'
|
- name: ENABLE_HPU_GRAPH
|
||||||
- name: LIMIT_HPU_GRAPH
|
value: 'true'
|
||||||
value: 'true'
|
- name: LIMIT_HPU_GRAPH
|
||||||
- name: USE_FLASH_ATTENTION
|
value: 'true'
|
||||||
value: 'true'
|
- name: USE_FLASH_ATTENTION
|
||||||
- name: FLASH_ATTENTION_RECOMPUTE
|
value: 'true'
|
||||||
value: 'true'
|
- name: FLASH_ATTENTION_RECOMPUTE
|
||||||
volumeMounts:
|
value: 'true'
|
||||||
- mountPath: /data
|
volumeMounts:
|
||||||
name: model-volume
|
- mountPath: /data
|
||||||
- mountPath: /dev/shm
|
name: model-volume
|
||||||
name: shm
|
- mountPath: /dev/shm
|
||||||
volumes:
|
name: shm
|
||||||
- hostPath:
|
volumes:
|
||||||
path: /mnt/models
|
- hostPath:
|
||||||
type: Directory
|
path: /mnt/models
|
||||||
name: model-volume
|
type: Directory
|
||||||
- emptyDir:
|
name: model-volume
|
||||||
medium: Memory
|
- emptyDir:
|
||||||
sizeLimit: 1Gi
|
medium: Memory
|
||||||
name: shm
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
services:
|
services:
|
||||||
- name: faq-micro-svc
|
- name: faq-micro-svc
|
||||||
|
|||||||
Reference in New Issue
Block a user