refactored GaqGen

This commit is contained in:
Zhenzhong Xu
2024-10-21 10:46:12 +03:00
parent 58ff7d9518
commit 4e1237d410
2 changed files with 68 additions and 75 deletions

View File

@@ -1,13 +1,8 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs:
- name: faq-mega-server-deploy
spec:
image_name: opea/chatqna
image_tag: latest
replicas: 2
resources:
limits:
@@ -19,16 +14,10 @@ podSpecs:
- name: faq-tgi-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.5
replicas: 7
resources:
limits:
habana.ai/gaudi: 1
- name: faq-micro-deploy
spec:
image_name: opea/llm-faqgen-tgi
image_tag: latest
replicas: 1

View File

@@ -7,20 +7,24 @@ config:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
CONFIG_MAP_NAME: faq-config
NODE_SELECTOR: faq-opea
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
deployments:
microservices:
- name: faq-mega-server-deploy
spec:
image: opea/chatqna:latest
replicas: 1
ports:
- containerPort: 7777
- name: faq-micro-deploy
spec:
image: opea/llm-faqgen-tgi:latest
replicas: 1
ports:
- containerPort: 9000
- name: faq-tgi-deploy
spec:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
replicas: 1
ports:
- containerPort: 80
resources: