Use official tei gaudi image and update tgi gaudi version (#810)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
lvliang-intel
2024-09-23 17:52:56 +08:00
committed by GitHub
parent c35fe0b429
commit 3fb60608b3
72 changed files with 8024 additions and 154 deletions

View File

@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
For Gaudi:
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
- whisper-gaudi: opea/whisper-gaudi:latest
- speecht5-gaudi: opea/speecht5-gaudi:latest

View File

@@ -271,7 +271,7 @@ spec:
- envFrom:
- configMapRef:
name: audio-qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -303,6 +303,14 @@ spec:
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
@@ -315,7 +323,7 @@ spec:
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir: