Update aipc ollama docker compose and readme (#984)

Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <suyue.chen@intel.com>
2024-10-22 10:30:47 +08:00
parent 9438d392b4
commit 0eedbbfce0
17 changed files with 67 additions and 86 deletions
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -78,26 +78,27 @@ llama3.2:latest   a80c4f17acd5    2.0 GB    2 minutes ago
 Access ollama service to verify that the ollama is functioning correctly.

 ```bash
-curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
+curl http://${host_ip}:11434/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "llama3.2",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": "Hello!"
+            }
+        ]
+    }'
 ```

 The outputs are similar to these:

 ```
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
-...
+{"id":"chatcmpl-4","object":"chat.completion","created":1729232496,"model":"llama3.2","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"How can I assist you today? Are you looking for information, answers to a question, or just need someone to chat with? I'm here to help in any way I can."},"finish_reason":"stop"}],"usage":{"prompt_tokens":33,"completion_tokens":38,"total_tokens":71}}
 ```

 ## 🚀 Build Docker Images
@@ -122,20 +123,14 @@ export https_proxy="Your_HTTPs_Proxy"
 docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
 ```

-### 2 Build LLM Image
-
-```bash
-docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
-```
-
-### 3. Build Dataprep Image
+### 2. Build Dataprep Image

 ```bash
 docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
 cd ..
 ```

-### 4. Build MegaService Docker Image
+### 3. Build MegaService Docker Image

 To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:

@@ -146,7 +141,7 @@ cd GenAIExamples/ChatQnA
 docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy  -f Dockerfile .
 ```

-### 5. Build UI Docker Image
+### 4. Build UI Docker Image

 Build frontend Docker image via below command:

@@ -155,7 +150,7 @@ cd ~/OPEA/GenAIExamples/ChatQnA/ui
 docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```

-### 6. Build Nginx Docker Image
+### 5. Build Nginx Docker Image

 ```bash
 cd GenAIComps
@@ -166,10 +161,9 @@ Then run the command `docker images`, you will have the following 6 Docker Image

 1. `opea/dataprep-redis:latest`
 2. `opea/retriever-redis:latest`
-3. `opea/llm-ollama:latest`
-4. `opea/chatqna:latest`
-5. `opea/chatqna-ui:latest`
-6. `opea/nginx:latest`
+3. `opea/chatqna:latest`
+4. `opea/chatqna-ui:latest`
+5. `opea/nginx:latest`

 ## 🚀 Start Microservices

@@ -195,10 +189,10 @@ For Linux users, please run `hostname -I | awk '{print $1}'`. For Windows users,
 export your_hf_api_token="Your_Huggingface_API_Token"
 ```

-**Append the value of the public IP address to the no_proxy list**
+**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**

 ```
-export your_no_proxy=${your_no_proxy},"External_Public_IP"
+export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-aipc-backend-server,tei-embedding-service,retriever,tei-reranking-service,redis-vector-db,dataprep-redis-service
 ```

 - Linux PC
@@ -211,7 +205,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export OLLAMA_ENDPOINT=http://${host_ip}:11434
+export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```

@@ -222,7 +216,7 @@ set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
 set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
-set OLLAMA_ENDPOINT=http://host.docker.internal:11434
+set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```

@@ -277,16 +271,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
   curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
   ```

-5. LLM Microservice
-
-   ```bash
-   curl http://${host_ip}:9000/v1/chat/completions\
-     -X POST \
-     -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-     -H 'Content-Type: application/json'
-   ```
-
-6. MegaService
+5. MegaService

   ```bash
   curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -294,7 +279,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
        }'
   ```

-7. Upload RAG Files through Dataprep Microservice (Optional)
+6. Upload RAG Files through Dataprep Microservice (Optional)

   To chat with retrieved information, you need to upload a file using Dataprep service.

@@ -334,4 +319,4 @@ the output is:

 ## 🚀 Launch the UI

-To access the frontend, open the following URL in your browser: http://{host_ip}:5173.
+To access the frontend, open the following URL in your browser: http://{host_ip}:80.
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -72,22 +72,7 @@ services:
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
-  llm:
-    image: ${REGISTRY:-opea}/llm-ollama
-    container_name: llm-ollama
-    ports:
-      - "9000:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
-      OLLAMA_MODEL: ${OLLAMA_MODEL}
-  chaqna-aipc-backend-server:
+  chatqna-aipc-backend-server:
    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
    container_name: chatqna-aipc-backend-server
    depends_on:
@@ -96,29 +81,29 @@ services:
      - tei-embedding-service
      - retriever
      - tei-reranking-service
-      - llm
    ports:
      - "8888:8888"
    environment:
      - no_proxy=${no_proxy}
      - https_proxy=${https_proxy}
      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
+      - MEGA_SERVICE_HOST_IP=chatqna-aipc-backend-server
      - EMBEDDING_SERVER_HOST_IP=tei-embedding-service
      - EMBEDDING_SERVER_PORT=80
      - RETRIEVER_SERVICE_HOST_IP=retriever
      - RERANK_SERVER_HOST_IP=tei-reranking-service
      - RERANK_SERVER_PORT=80
-      - LLM_SERVER_HOST_IP=llm
-      - LLM_SERVER_PORT=9000
+      - LLM_SERVER_HOST_IP=${OLLAMA_HOST}
+      - LLM_SERVER_PORT=11434
+      - LLM_MODEL=${OLLAMA_MODEL}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
-  chaqna-aipc-ui-server:
+  chatqna-aipc-ui-server:
    image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
    container_name: chatqna-aipc-ui-server
    depends_on:
-      - chaqna-aipc-backend-server
+      - chatqna-aipc-backend-server
    ports:
      - "5173:5173"
    environment:
@@ -127,22 +112,22 @@ services:
      - http_proxy=${http_proxy}
    ipc: host
    restart: always
-  chaqna-aipc-nginx-server:
+  chatqna-aipc-nginx-server:
    image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
-    container_name: chaqna-aipc-nginx-server
+    container_name: chatqna-aipc-nginx-server
    depends_on:
-      - chaqna-aipc-backend-server
-      - chaqna-aipc-ui-server
+      - chatqna-aipc-backend-server
+      - chatqna-aipc-ui-server
    ports:
      - "${NGINX_PORT:-80}:80"
    environment:
      - no_proxy=${no_proxy}
      - https_proxy=${https_proxy}
      - http_proxy=${http_proxy}
-      - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
+      - FRONTEND_SERVICE_IP=chatqna-aipc-ui-server
      - FRONTEND_SERVICE_PORT=5173
      - BACKEND_SERVICE_NAME=chatqna
-      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
+      - BACKEND_SERVICE_IP=chatqna-aipc-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
      - DATAPREP_SERVICE_PORT=6007
--- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -16,5 +16,5 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export OLLAMA_ENDPOINT=http://${host_ip}:11434
+export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

@@ -27,6 +25,9 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   export http_proxy="Your_HTTP_Proxy"
   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
   ```

 3. Set up other environment variables:
@@ -218,8 +219,6 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
@@ -230,6 +229,8 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   export http_proxy="Your_HTTP_Proxy"
   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
   ```

 3. Set up other environment variables:
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
@@ -167,10 +167,10 @@ export host_ip="External_Public_IP"
 export your_hf_api_token="Your_Huggingface_API_Token"
 ```

-**Append the value of the public IP address to the no_proxy list**
+**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**

 ```
-export your_no_proxy=${your_no_proxy},"External_Public_IP"
+export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-qdrant-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
 ```

 ```bash
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -112,6 +112,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=tgi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -111,6 +111,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=tgi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
@@ -110,6 +110,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=vllm_service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -93,6 +93,7 @@ services:
      - RETRIEVER_SERVICE_HOST_IP=retriever
      - LLM_SERVER_HOST_IP=tgi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   export http_proxy="Your_HTTP_Proxy"
   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
   ```

 3. Set up other environment variables:
@@ -216,8 +216,6 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
@@ -228,6 +226,8 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   export http_proxy="Your_HTTP_Proxy"
   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
   ```

 3. Set up other environment variables:
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -133,6 +133,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=tgi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -176,6 +176,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=tgi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
@@ -128,6 +128,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=vllm-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
@@ -128,6 +128,7 @@ services:
      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
      - LLM_SERVER_HOST_IP=vllm-ray-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -109,6 +109,7 @@ services:
      - RETRIEVER_SERVICE_HOST_IP=retriever
      - LLM_SERVER_HOST_IP=tgi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always