Refactor AudioQnA/MultiModalQnA/AvatarChatbot (#1310)

Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chensuyue <suyue.chen@intel.com>
2024-12-31 12:47:30 +08:00
parent 250ffb8b66
commit cc1d97f816
43 changed files with 482 additions and 1102 deletions
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -14,24 +14,17 @@ cd GenAIComps
 ### 2. Build ASR Image

 ```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
-
-
-docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
 ```

 ### 3. Build LLM Image

-```bash
-docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
-```
+Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)

 ### 4. Build TTS Image

 ```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
-
-docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
+docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
 ```

 ### 5. Build Animation Image
@@ -55,13 +48,10 @@ docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$ht
 Then run the command `docker images`, you will have following images ready:

 1. `opea/whisper:latest`
-2. `opea/asr:latest`
-3. `opea/llm-tgi:latest`
-4. `opea/speecht5:latest`
-5. `opea/tts:latest`
-6. `opea/wav2lip:latest`
-7. `opea/animation:latest`
-8. `opea/avatarchatbot:latest`
+2. `opea/speecht5:latest`
+3. `opea/wav2lip:latest`
+4. `opea/animation:latest`
+5. `opea/avatarchatbot:latest`

 ## 🚀 Set the environment variables

@@ -71,24 +61,21 @@ Before starting the services with `docker compose`, you have to recheck the foll
 export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')

-export TGI_LLM_ENDPOINT=http://$host_ip:3006
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

-export ASR_ENDPOINT=http://$host_ip:7066
-export TTS_ENDPOINT=http://$host_ip:7055
 export WAV2LIP_ENDPOINT=http://$host_ip:7860

 export MEGA_SERVICE_HOST_IP=${host_ip}
-export ASR_SERVICE_HOST_IP=${host_ip}
-export TTS_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
+export WHISPER_SERVER_HOST_IP=${host_ip}
+export WHISPER_SERVER_PORT=7066
+export SPEECHT5_SERVER_HOST_IP=${host_ip}
+export SPEECHT5_SERVER_PORT=7055
+export LLM_SERVER_HOST_IP=${host_ip}
+export LLM_SERVER_PORT=3006
 export ANIMATION_SERVICE_HOST_IP=${host_ip}
+export ANIMATION_SERVICE_PORT=3008

 export MEGA_SERVICE_PORT=8888
-export ASR_SERVICE_PORT=3001
-export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
-export ANIMATION_SERVICE_PORT=3008
 ```

 - Xeon CPU
@@ -124,36 +111,18 @@ curl http://${host_ip}:7066/v1/asr \
  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
  -H 'Content-Type: application/json'

-# asr microservice
-curl http://${host_ip}:3001/v1/audio/transcriptions \
-  -X POST \
-  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
 # tgi service
 curl http://${host_ip}:3006/generate \
  -X POST \
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
  -H 'Content-Type: application/json'

-# llm microservice
-curl http://${host_ip}:3007/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-  -H 'Content-Type: application/json'
-
 # speecht5 service
 curl http://${host_ip}:7055/v1/tts \
  -X POST \
  -d '{"text": "Who are you?"}' \
  -H 'Content-Type: application/json'

-# tts microservice
-curl http://${host_ip}:3002/v1/audio/speech \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
 # wav2lip service
 cd ../../../..
 curl http://${host_ip}:7860/v1/wav2lip \
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,14 +14,6 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
    restart: unless-stopped
-  asr:
-    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
-    container_name: asr-service
-    ports:
-      - "3001:9099"
-    ipc: host
-    environment:
-      ASR_ENDPOINT: ${ASR_ENDPOINT}
  speecht5-service:
    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
    container_name: speecht5-service
@@ -33,14 +25,6 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
    restart: unless-stopped
-  tts:
-    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-    container_name: tts-service
-    ports:
-      - "3002:9088"
-    ipc: host
-    environment:
-      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
@@ -55,21 +39,6 @@ services:
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  llm:
-    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
-    container_name: llm-tgi-server
-    depends_on:
-      - tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
  wav2lip-service:
    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
    container_name: wav2lip-service
@@ -110,9 +79,6 @@ services:
    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
    container_name: avatarchatbot-xeon-backend-server
    depends_on:
-      - asr
-      - llm
-      - tts
      - animation
    ports:
      - "3009:8888"
@@ -122,12 +88,12 @@ services:
      - http_proxy=${http_proxy}
      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
-      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
-      - ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
-      - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
-      - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
+      - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
+      - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
+      - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
+      - LLM_SERVER_PORT=${LLM_SERVER_PORT}
+      - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
+      - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
      - ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
      - ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
    ipc: host
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -14,24 +14,17 @@ cd GenAIComps
 ### 2. Build ASR Image

 ```bash
-docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
-
-
-docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
 ```

 ### 3. Build LLM Image

-```bash
-docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
-```
+Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)

 ### 4. Build TTS Image

 ```bash
-docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
-
-docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
+docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
 ```

 ### 5. Build Animation Image
@@ -55,13 +48,10 @@ docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$ht
 Then run the command `docker images`, you will have following images ready:

 1. `opea/whisper-gaudi:latest`
-2. `opea/asr:latest`
-3. `opea/llm-tgi:latest`
-4. `opea/speecht5-gaudi:latest`
-5. `opea/tts:latest`
-6. `opea/wav2lip-gaudi:latest`
-7. `opea/animation:latest`
-8. `opea/avatarchatbot:latest`
+2. `opea/speecht5-gaudi:latest`
+3. `opea/wav2lip-gaudi:latest`
+4. `opea/animation:latest`
+5. `opea/avatarchatbot:latest`

 ## 🚀 Set the environment variables

@@ -71,24 +61,21 @@ Before starting the services with `docker compose`, you have to recheck the foll
 export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')

-export TGI_LLM_ENDPOINT=http://$host_ip:3006
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

-export ASR_ENDPOINT=http://$host_ip:7066
-export TTS_ENDPOINT=http://$host_ip:7055
 export WAV2LIP_ENDPOINT=http://$host_ip:7860

 export MEGA_SERVICE_HOST_IP=${host_ip}
-export ASR_SERVICE_HOST_IP=${host_ip}
-export TTS_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
+export WHISPER_SERVER_HOST_IP=${host_ip}
+export WHISPER_SERVER_PORT=7066
+export SPEECHT5_SERVER_HOST_IP=${host_ip}
+export SPEECHT5_SERVER_PORT=7055
+export LLM_SERVER_HOST_IP=${host_ip}
+export LLM_SERVER_PORT=3006
 export ANIMATION_SERVICE_HOST_IP=${host_ip}
+export ANIMATION_SERVICE_PORT=3008

 export MEGA_SERVICE_PORT=8888
-export ASR_SERVICE_PORT=3001
-export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
-export ANIMATION_SERVICE_PORT=3008
 ```

 - Gaudi2 HPU
@@ -124,36 +111,18 @@ curl http://${host_ip}:7066/v1/asr \
  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
  -H 'Content-Type: application/json'

-# asr microservice
-curl http://${host_ip}:3001/v1/audio/transcriptions \
-  -X POST \
-  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
 # tgi service
 curl http://${host_ip}:3006/generate \
  -X POST \
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
  -H 'Content-Type: application/json'

-# llm microservice
-curl http://${host_ip}:3007/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-  -H 'Content-Type: application/json'
-
 # speecht5 service
 curl http://${host_ip}:7055/v1/tts \
  -X POST \
  -d '{"text": "Who are you?"}' \
  -H 'Content-Type: application/json'

-# tts microservice
-curl http://${host_ip}:3002/v1/audio/speech \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
 # wav2lip service
 cd ../../../..
 curl http://${host_ip}:7860/v1/wav2lip \
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -21,14 +21,6 @@ services:
    cap_add:
      - SYS_NICE
    restart: unless-stopped
-  asr:
-    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
-    container_name: asr-service
-    ports:
-      - "3001:9099"
-    ipc: host
-    environment:
-      ASR_ENDPOINT: ${ASR_ENDPOINT}
  speecht5-service:
    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
    container_name: speecht5-service
@@ -45,14 +37,6 @@ services:
    cap_add:
      - SYS_NICE
    restart: unless-stopped
-  tts:
-    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-    container_name: tts-service
-    ports:
-      - "3002:9088"
-    ipc: host
-    environment:
-      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
    container_name: tgi-gaudi-server
@@ -78,21 +62,6 @@ services:
      - SYS_NICE
    ipc: host
    command: --model-id ${LLM_MODEL_ID} --max-input-length 128 --max-total-tokens 256
-  llm:
-    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
-    container_name: llm-tgi-gaudi-server
-    depends_on:
-      - tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
  wav2lip-service:
    image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
    container_name: wav2lip-service
@@ -143,9 +112,6 @@ services:
    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
    container_name: avatarchatbot-gaudi-backend-server
    depends_on:
-      - asr
-      - llm
-      - tts
      - animation
    ports:
      - "3009:8888"
@@ -155,12 +121,12 @@ services:
      - http_proxy=${http_proxy}
      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
-      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
-      - ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
-      - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
-      - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
+      - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
+      - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
+      - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
+      - LLM_SERVER_PORT=${LLM_SERVER_PORT}
+      - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
+      - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
      - ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
      - ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
    ipc: host