MultimodalQnA image query, pdf, dynamic ports, and UI updates (#1381)

Per the proposed changes in this [RFC](https://github.com/opea-project/docs/blob/main/community/rfcs/24-10-02-GenAIExamples-001-Image_and_Audio_Support_in_MultimodalQnA.md)'s Phase 2 plan, this PR adds support for image queries, PDF ingestion and display, and dynamic ports. There are also some bug fixes. This PR goes with [this one in GenAIComps](https://github.com/opea-project/GenAIComps/pull/1134). Signed-off-by: Melanie Buehler <melanie.h.buehler@intel.com> Co-authored-by: Liang Lv <liang1.lv@intel.com>
2025-01-20 06:41:52 -08:00
parent f3562bef36
commit f11ab458d8
26 changed files with 810 additions and 297 deletions
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
@@ -40,6 +40,10 @@ lvm
 ===
 Port 9399 - Open to 0.0.0.0/0

+whisper
+===
+port 7066 - Open to 0.0.0.0/0
+
 dataprep-multimodal-redis
 ===
 Port 6007 - Open to 0.0.0.0/0
@@ -75,34 +79,47 @@ export your_no_proxy=${your_no_proxy},"External_Public_IP"
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export EMBEDDER_PORT=6006
-export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT"
-export MM_EMBEDDING_PORT_MICROSERVICE=6000
-export WHISPER_SERVER_PORT=7066
-export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
-export REDIS_URL="redis://${host_ip}:6379"
-export REDIS_HOST=${host_ip}
-export INDEX_NAME="mm-rag-redis"
-export BRIDGE_TOWER_EMBEDDING=true
-export LLAVA_SERVER_PORT=8399
-export LVM_ENDPOINT="http://${host_ip}:8399"
-export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
-export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
-export WHISPER_MODEL="base"
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
+export WHISPER_PORT=7066
+export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr"
+export WHISPER_MODEL="base"
+export MAX_IMAGES=1
+export REDIS_DB_PORT=6379
+export REDIS_INSIGHTS_PORT=8001
+export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
+export REDIS_HOST=${host_ip}
+export INDEX_NAME="mm-rag-redis"
+export DATAPREP_MMR_PORT=5000
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete"
+export EMM_BRIDGETOWER_PORT=6006
+export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
+export BRIDGE_TOWER_EMBEDDING=true
+export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT"
+export MM_EMBEDDING_PORT_MICROSERVICE=6000
+export REDIS_RETRIEVER_PORT=7000
+export LVM_PORT=9399
+export LLAVA_SERVER_PORT=8399
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
+export LVM_ENDPOINT="http://${host_ip}:$LLAVA_SERVER_PORT"
+export MEGA_SERVICE_PORT=8888
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:$MEGA_SERVICE_PORT/v1/multimodalqna"
+export UI_PORT=5173
 ```

 Note: Please replace with `host_ip` with you external IP address, do not use localhost.

+> Note: The `MAX_IMAGES` environment variable is used to specify the maximum number of images that will be sent from the LVM service to the LLaVA server.
+> If an image list longer than `MAX_IMAGES` is sent to the LVM server, a shortened image list will be sent to the LLaVA service. If the image list
+> needs to be shortened, the most recent images (the ones at the end of the list) are prioritized to send to the LLaVA service. Some LLaVA models have not
+> been trained with multiple images and may lead to inaccurate results. If `MAX_IMAGES` is not set, it will default to `1`.
+
 ## 🚀 Build Docker Images

 ### 1. Build embedding-multimodal-bridgetower Image
@@ -112,7 +129,7 @@ Build embedding-multimodal-bridgetower docker image
 ```bash
 git clone https://github.com/opea-project/GenAIComps.git
 cd GenAIComps
-docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile .
+docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMM_BRIDGETOWER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/bridgetower/src/Dockerfile .
 ```

 Build embedding microservice image
@@ -147,7 +164,7 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy
 docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```

-### 5. Build asr images
+### 5. Build Whisper Server Image

 Build whisper server image

@@ -214,14 +231,14 @@ docker compose -f compose.yaml up -d
 1. embedding-multimodal-bridgetower

 ```bash
-curl http://${host_ip}:${EMBEDDER_PORT}/v1/encode \
+curl http://${host_ip}:${EMM_BRIDGETOWER_PORT}/v1/encode \
     -X POST \
     -H "Content-Type:application/json" \
     -d '{"text":"This is example"}'
 ```

 ```bash
-curl http://${host_ip}:${EMBEDDER_PORT}/v1/encode \
+curl http://${host_ip}:${EMM_BRIDGETOWER_PORT}/v1/encode \
     -X POST \
     -H "Content-Type:application/json" \
     -d '{"text":"This is example", "img_b64_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC"}'
@@ -247,13 +264,13 @@ curl http://${host_ip}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \

 ```bash
 export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
-curl http://${host_ip}:7000/v1/multimodal_retrieval \
+curl http://${host_ip}:${REDIS_RETRIEVER_PORT}/v1/multimodal_retrieval \
    -X POST \
    -H "Content-Type: application/json" \
    -d "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 ```

-4. asr
+4. whisper

 ```bash
 curl ${WHISPER_SERVER_ENDPOINT} \
@@ -274,14 +291,14 @@ curl http://${host_ip}:${LLAVA_SERVER_PORT}/generate \
 6. lvm

 ```bash
-curl http://${host_ip}:9399/v1/lvm \
+curl http://${host_ip}:${LVM_PORT}/v1/lvm \
    -X POST \
    -H 'Content-Type: application/json' \
    -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 ```

 ```bash
-curl http://${host_ip}:9399/v1/lvm  \
+curl http://${host_ip}:${LVM_PORT}/v1/lvm  \
    -X POST \
    -H 'Content-Type: application/json' \
    -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}'
@@ -290,7 +307,7 @@ curl http://${host_ip}:9399/v1/lvm  \
 Also, validate LVM Microservice with empty retrieval results

 ```bash
-curl http://${host_ip}:9399/v1/lvm \
+curl http://${host_ip}:${LVM_PORT}/v1/lvm \
    -X POST \
    -H 'Content-Type: application/json' \
    -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
@@ -298,7 +315,7 @@ curl http://${host_ip}:9399/v1/lvm \

 7. dataprep-multimodal-redis

-Download a sample video, image, and audio file and create a caption
+Download a sample video, image, pdf, and audio file and create a caption

 ```bash
 export video_fn="WeAreGoingOnBullrun.mp4"
@@ -307,6 +324,9 @@ wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoing
 export image_fn="apple.png"
 wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}

+export pdf_fn="nke-10k-2023.pdf"
+wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf -O ${pdf_fn}
+
 export caption_fn="apple.txt"
 echo "This is an apple."  > ${caption_fn}

@@ -314,6 +334,15 @@ export audio_fn="AudioSample.wav"
 wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -O ${audio_fn}
 ```

+```bash
+export DATAPREP_MMR_PORT=6007
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete"
+```
+
 Test dataprep microservice with generating transcript. This command updates a knowledge base by uploading a local video .mp4 and an audio .wav file.

 ```bash
@@ -325,7 +354,7 @@ curl --silent --write-out "HTTPSTATUS:%{http_code}" \
    -F "files=@./${audio_fn}"
 ```

-Also, test dataprep microservice with generating an image caption using lvm microservice
+Also, test dataprep microservice with generating an image caption using lvm microservice.

 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
@@ -334,13 +363,14 @@ curl --silent --write-out "HTTPSTATUS:%{http_code}" \
    -X POST -F "files=@./${image_fn}"
 ```

-Now, test the microservice with posting a custom caption along with an image
+Now, test the microservice with posting a custom caption along with an image and a PDF containing images and text.

 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
    ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
    -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${image_fn}" -F "files=@./${caption_fn}"
+    -X POST -F "files=@./${image_fn}" -F "files=@./${caption_fn}" \
+    -F "files=@./${pdf_fn}"
 ```

 Also, you are able to get the list of all files that you uploaded:
@@ -358,7 +388,8 @@ Then you will get the response python-style LIST like this. Notice the name of e
    "WeAreGoingOnBullrun_7ac553a1-116c-40a2-9fc5-deccbb89b507.mp4",
    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4",
    "apple_fcade6e6-11a5-44a2-833a-3e534cbe4419.png",
-    "AudioSample_976a85a6-dc3e-43ab-966c-9d81beef780c.wav
+    "nke-10k-2023_28000757-5533-4b1b-89fe-7c0a1b7e2cd0.pdf",
+    "AudioSample_976a85a6-dc3e-43ab-966c-9d81beef780c.wav"
 ]
 ```

@@ -372,21 +403,41 @@ curl -X POST \

 8. MegaService

+Test the MegaService with a text query:
+
 ```bash
-curl http://${host_ip}:8888/v1/multimodalqna \
+curl http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna \
    -H "Content-Type: application/json" \
    -X POST \
    -d '{"messages": "What is the revenue of Nike in 2023?"}'
 ```

+Test the MegaService with an audio query:
+
 ```bash
-curl http://${host_ip}:8888/v1/multimodalqna  \
+curl http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna  \
+    -H "Content-Type: application/json"  \
+    -d '{"messages": [{"role": "user", "content": [{"type": "audio", "audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}]}]}'
+```
+
+Test the MegaService with a text and image query:
+
+```bash
+curl http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna \
+    -H "Content-Type: application/json" \
+    -d  '{"messages": [{"role": "user", "content": [{"type": "text", "text": "Green bananas in a tree"}, {"type": "image_url", "image_url": {"url": "http://images.cocodataset.org/test-stuff2017/000000004248.jpg"}}]}]}'
+```
+
+Test the MegaService with a back and forth conversation between the user and assistant:
+
+```bash
+curl http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna  \
    -H "Content-Type: application/json"  \
    -d '{"messages": [{"role": "user", "content": [{"type": "audio", "audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}]}]}'
 ```

 ```bash
-curl http://${host_ip}:8888/v1/multimodalqna \
+curl http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna \
    -H "Content-Type: application/json" \
    -d '{"messages": [{"role": "user", "content": [{"type": "text", "text": "hello, "}, {"type": "image_url", "image_url": {"url": "https://www.ilankelman.org/stopsigns/australia.jpg"}}]}, {"role": "assistant", "content": "opea project! "}, {"role": "user", "content": "chao, "}], "max_tokens": 10}'
 ```
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -6,7 +6,7 @@ services:
    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
    container_name: whisper-service
    ports:
-      - "7066:7066"
+      - "${WHISPER_PORT}:7066"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
@@ -17,8 +17,8 @@ services:
    image: redis/redis-stack:7.2.0-v9
    container_name: redis-vector-db
    ports:
-      - "6379:6379"
-      - "8001:8001"
+      - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
+      - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
  dataprep-multimodal-redis:
    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-multimodal-redis
@@ -26,15 +26,16 @@ services:
      - redis-vector-db
      - lvm-llava
    ports:
-      - "6007:5000"
+      - "6007:${DATAPREP_MMR_PORT}"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      REDIS_HOST: ${REDIS_HOST}
+      DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT}
      INDEX_NAME: ${INDEX_NAME}
-      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm"
+      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm"
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
@@ -43,14 +44,15 @@ services:
    image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
    container_name: embedding-multimodal-bridgetower
    ports:
-      - ${EMBEDDER_PORT}:${EMBEDDER_PORT}
+      - ${EMM_BRIDGETOWER_PORT}:${EMM_BRIDGETOWER_PORT}
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      PORT: ${EMBEDDER_PORT}
+      EMM_BRIDGETOWER_PORT: ${EMM_BRIDGETOWER_PORT}
+      PORT: ${EMM_BRIDGETOWER_PORT}
    healthcheck:
-      test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"]
+      test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMM_BRIDGETOWER_PORT}/v1/health_check"]
      interval: 10s
      timeout: 6s
      retries: 18
@@ -80,13 +82,16 @@ services:
    depends_on:
      - redis-vector-db
    ports:
-      - "7000:7000"
+      - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
+      REDIS_DB_PORT: ${REDIS_DB_PORT}
+      REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT}
+      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
      INDEX_NAME: ${INDEX_NAME}
      BRIDGE_TOWER_EMBEDDING: ${BRIDGE_TOWER_EMBEDDING}
      LOGFLAG: ${LOGFLAG}
@@ -96,11 +101,13 @@ services:
    image: ${REGISTRY:-opea}/lvm-llava:${TAG:-latest}
    container_name: lvm-llava
    ports:
-      - "8399:8399"
+      - "${LLAVA_SERVER_PORT}:${LLAVA_SERVER_PORT}"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
+      LLAVA_SERVER_PORT: ${LLAVA_SERVER_PORT}
+      LVM_PORT: ${LVM_PORT}
    entrypoint: ["python", "llava_server.py", "--device", "cpu", "--model_name_or_path", $LVM_MODEL_ID]
    restart: unless-stopped
  lvm:
@@ -109,7 +116,7 @@ services:
    depends_on:
      - lvm-llava
    ports:
-      - "9399:9399"
+      - "${LVM_PORT}:${LVM_PORT}"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
@@ -117,6 +124,9 @@ services:
      https_proxy: ${https_proxy}
      LVM_COMPONENT_NAME: "OPEA_LLAVA_LVM"
      LVM_ENDPOINT: ${LVM_ENDPOINT}
+      LLAVA_SERVER_PORT: ${LLAVA_SERVER_PORT}
+      LVM_PORT: ${LVM_PORT}
+      MAX_IMAGES: ${MAX_IMAGES:-1}
    restart: unless-stopped
  multimodalqna:
    image: ${REGISTRY:-opea}/multimodalqna:${TAG:-latest}
@@ -128,17 +138,19 @@ services:
      - retriever-redis
      - lvm
    ports:
-      - "8888:8888"
+      - "${MEGA_SERVICE_PORT}:${MEGA_SERVICE_PORT}"
    environment:
      no_proxy: ${no_proxy}
      https_proxy: ${https_proxy}
      http_proxy: ${http_proxy}
      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT}
      MM_EMBEDDING_SERVICE_HOST_IP: ${MM_EMBEDDING_SERVICE_HOST_IP}
      MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
      MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
      LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
-      WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
+      LVM_MODEL_ID: ${LVM_MODEL_ID}
+      WHISPER_PORT: ${WHISPER_PORT}
      WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
    ipc: host
    restart: always
@@ -148,7 +160,7 @@ services:
    depends_on:
      - multimodalqna
    ports:
-      - "5173:5173"
+      - "${UI_PORT}:${UI_PORT}"
    environment:
      - no_proxy=${no_proxy}
      - https_proxy=${https_proxy}
@@ -157,6 +169,9 @@ services:
      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
      - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
      - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
+      - MEGA_SERVICE_PORT:=${MEGA_SERVICE_PORT}
+      - UI_PORT=${UI_PORT}
+      - DATAPREP_MMR_PORT=${DATAPREP_MMR_PORT}
    ipc: host
    restart: always

--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -6,30 +6,49 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

+export host_ip=$(hostname -I | awk '{print $1}')
+
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export EMBEDDER_PORT=6006
-export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT"
-export MM_EMBEDDING_PORT_MICROSERVICE=6000
-export WHISPER_SERVER_PORT=7066
-export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
-export REDIS_URL="redis://${host_ip}:6379"
-export REDIS_HOST=${host_ip}
-export INDEX_NAME="mm-rag-redis"
-export BRIDGE_TOWER_EMBEDDING=true
-export LLAVA_SERVER_PORT=8399
-export LVM_ENDPOINT="http://${host_ip}:8399"
-export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
-export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
-export WHISPER_MODEL="base"
+
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
+
+export WHISPER_PORT=7066
+export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr"
+export WHISPER_MODEL="base"
+export MAX_IMAGES=1
+
+export REDIS_DB_PORT=6379
+export REDIS_INSIGHTS_PORT=8001
+export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
+export REDIS_HOST=${host_ip}
+export INDEX_NAME="mm-rag-redis"
+
+export DATAPREP_MMR_PORT=5000
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete"
+
+export EMM_BRIDGETOWER_PORT=6006
+export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
+export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT"
+export MM_EMBEDDING_PORT_MICROSERVICE=6000
+export BRIDGE_TOWER_EMBEDDING=true
+
+export REDIS_RETRIEVER_PORT=7000
+
+export LVM_PORT=9399
+export LLAVA_SERVER_PORT=8399
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
+export LVM_ENDPOINT="http://${host_ip}:${LLAVA_SERVER_PORT}"
+
+export MEGA_SERVICE_PORT=8888
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna"
+
+export UI_PORT=5173