Compare commits
4 Commits
HF-TOKEN
...
update_vLL
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c712c01829 | ||
|
|
66ecaf2e6f | ||
|
|
46fab4a736 | ||
|
|
8ba40a5bff |
2
.github/env/_build_image.sh
vendored
2
.github/env/_build_image.sh
vendored
@@ -2,4 +2,4 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export VLLM_VER=v0.8.3
|
||||
export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
export VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
|
||||
2
.github/workflows/_gmc-e2e.yml
vendored
2
.github/workflows/_gmc-e2e.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
- name: Run tests
|
||||
id: run-test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
run: |
|
||||
|
||||
4
.github/workflows/_helm-e2e.yml
vendored
4
.github/workflows/_helm-e2e.yml
vendored
@@ -165,8 +165,8 @@ jobs:
|
||||
env:
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
HFTOKEN: ${{ secrets.HF_TOKEN }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
value_file: ${{ matrix.value_file }}
|
||||
run: |
|
||||
set -xe
|
||||
|
||||
4
.github/workflows/_run-docker-compose.yml
vendored
4
.github/workflows/_run-docker-compose.yml
vendored
@@ -160,8 +160,8 @@ jobs:
|
||||
- name: Run test
|
||||
shell: bash
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
|
||||
@@ -13,8 +13,8 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
shm_size: 32g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -104,7 +104,7 @@ services:
|
||||
with_memory: true
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
|
||||
@@ -10,8 +10,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
WILM_USE_TRITON_FLASH_ATTENTION: 0
|
||||
@@ -46,7 +46,7 @@ services:
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -108,7 +108,7 @@ services:
|
||||
with_memory: true
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
|
||||
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"
|
||||
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../../
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HF_CACHE_DIR="./data"
|
||||
export MODEL_CACHE="./data"
|
||||
@@ -39,7 +39,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
|
||||
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"
|
||||
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../../
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HF_CACHE_DIR="./data"
|
||||
export MODEL_CACHE="./data"
|
||||
@@ -40,7 +40,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
|
||||
@@ -20,8 +20,8 @@ export CRAG_SERVER_PORT="18114"
|
||||
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../../
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HF_CACHE_DIR="./data"
|
||||
export MODEL_CACHE="./data"
|
||||
@@ -42,7 +42,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
|
||||
@@ -33,7 +33,7 @@ fi
|
||||
# retriever
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
|
||||
@@ -16,7 +16,7 @@ services:
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -50,7 +50,7 @@ services:
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -79,7 +79,7 @@ services:
|
||||
with_memory: true
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
@@ -122,7 +122,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
|
||||
@@ -16,8 +16,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
|
||||
# LLM related environment variables
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:8086"
|
||||
|
||||
@@ -13,7 +13,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
|
||||
@@ -37,7 +37,7 @@ function build_agent_docker_image_gaudi_vllm() {
|
||||
get_genai_comps
|
||||
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build agent image with --no-cache..."
|
||||
|
||||
@@ -20,7 +20,7 @@ function start_retrieval_tool() {
|
||||
cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon
|
||||
host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
|
||||
@@ -11,8 +11,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=$ip_address
|
||||
echo "ip_address=${ip_address}"
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
HF_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
|
||||
export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
|
||||
|
||||
@@ -7,7 +7,7 @@ WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address"
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
|
||||
@@ -9,7 +9,7 @@ ls $WORKPATH
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
|
||||
@@ -8,7 +8,7 @@ WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID}
|
||||
|
||||
@@ -35,8 +35,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
WILM_USE_TRITON_FLASH_ATTENTION: 0
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export host_ip=${ip_address}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
export host_ip=${ip_address}
|
||||
export external_host_ip=${ip_address}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_CACHE_DIR="./data"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export VLLM_SERVICE_PORT="8081"
|
||||
|
||||
@@ -36,7 +36,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
|
||||
@@ -36,7 +36,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
# export host_ip=<your External Public IP>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
@@ -45,7 +45,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
|
||||
@@ -45,7 +45,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
# export host_ip=<your External Public IP>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
@@ -27,7 +27,7 @@ function build_docker_images() {
|
||||
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd vllm-fork/
|
||||
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
echo "Check out vLLM tag ${VLLM_FORK_VER}"
|
||||
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ Then run the command `docker images`, you will have following images ready:
|
||||
Before starting the services with `docker compose`, you have to recheck the following environment variables.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_SERVICE_PORT=3006
|
||||
|
||||
@@ -52,8 +52,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
|
||||
interval: 10s
|
||||
|
||||
@@ -6,7 +6,7 @@ pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
@@ -48,7 +48,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
|
||||
@@ -6,7 +6,7 @@ pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
@@ -16,7 +16,7 @@ services:
|
||||
- chatqna-redis-vector-db
|
||||
- chatqna-tei-embedding-service
|
||||
ports:
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -16,7 +16,7 @@ services:
|
||||
- chatqna-redis-vector-db
|
||||
- chatqna-tei-embedding-service
|
||||
ports:
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -16,7 +16,7 @@ services:
|
||||
- chatqna-redis-vector-db
|
||||
- chatqna-tei-embedding-service
|
||||
ports:
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -16,7 +16,7 @@ services:
|
||||
- chatqna-redis-vector-db
|
||||
- chatqna-tei-embedding-service
|
||||
ports:
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||
- "${CHATQNA_REDIS_DATAPREP_PORT:-5000}:5000"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -2,17 +2,17 @@
|
||||
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
export HOST_IP=''
|
||||
export HOST_IP_EXTERNAL=''
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
||||
export CHATQNA_NGINX_PORT=18104
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
|
||||
@@ -2,18 +2,18 @@
|
||||
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
export HOST_IP=''
|
||||
export HOST_IP_EXTERNAL=''
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_NGINX_PORT=18104
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
|
||||
@@ -2,18 +2,18 @@
|
||||
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
export HOST_IP=''
|
||||
export HOST_IP_EXTERNAL=''
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_NGINX_PORT=18104
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
|
||||
@@ -2,17 +2,17 @@
|
||||
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
export HOST_IP=''
|
||||
export HOST_IP_EXTERNAL=''
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
||||
export CHATQNA_NGINX_PORT=18104
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
|
||||
@@ -183,7 +183,7 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export OLLAMA_HOST=${host_ip}
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
```
|
||||
@@ -194,7 +194,7 @@ export OLLAMA_MODEL="llama3.2"
|
||||
set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
set RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
set INDEX_NAME=rag-redis
|
||||
set HUGGINGFACEHUB_API_TOKEN=%HF_TOKEN%
|
||||
set HUGGINGFACEHUB_API_TOKEN=%HUGGINGFACEHUB_API_TOKEN%
|
||||
set OLLAMA_HOST=host.docker.internal
|
||||
set OLLAMA_MODEL="llama3.2"
|
||||
```
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
|
||||
@@ -9,7 +9,7 @@ popd > /dev/null
|
||||
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
if [ -z "${HF_TOKEN}" ]; then
|
||||
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
|
||||
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
|
||||
fi
|
||||
|
||||
@@ -17,7 +17,7 @@ if [ -z "${host_ip}" ]; then
|
||||
echo "Error: host_ip is not set. Please set host_ip first."
|
||||
fi
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export INDEX_NAME="rag-redis"
|
||||
|
||||
@@ -31,7 +31,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -67,7 +67,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -83,7 +83,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -99,7 +99,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
VLLM_CPU_KVCACHE_SPACE: 40
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
|
||||
@@ -35,7 +35,7 @@ services:
|
||||
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR"
|
||||
MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -69,7 +69,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MARIADBVECTOR"
|
||||
restart: unless-stopped
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -101,7 +101,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
VLLM_CPU_KVCACHE_SPACE: 40
|
||||
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
MILVUS_HOST: ${host_ip}
|
||||
MILVUS_PORT: 19530
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
healthcheck:
|
||||
@@ -107,7 +107,7 @@ services:
|
||||
MILVUS_HOST: ${host_ip}
|
||||
MILVUS_PORT: 19530
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS"
|
||||
restart: unless-stopped
|
||||
@@ -138,7 +138,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -155,7 +155,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
healthcheck:
|
||||
|
||||
@@ -20,7 +20,7 @@ services:
|
||||
PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
@@ -55,7 +55,7 @@ services:
|
||||
PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_PINECONE"
|
||||
restart: unless-stopped
|
||||
@@ -71,7 +71,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -87,7 +87,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
QDRANT_PORT: 6333
|
||||
QDRANT_INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -92,7 +92,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||
|
||||
@@ -31,7 +31,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
@@ -61,7 +61,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -77,7 +77,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -92,7 +92,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
if ls *.json 1> /dev/null 2>&1; then
|
||||
rm *.json
|
||||
fi
|
||||
rm *.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/chatqna_megaservice_grafana.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/qdrant_grafana.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/milvus_grafana.json
|
||||
|
||||
@@ -7,9 +7,6 @@ pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
@@ -7,7 +7,7 @@ pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
if [ -z "${HF_TOKEN}" ]; then
|
||||
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
|
||||
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
|
||||
fi
|
||||
|
||||
@@ -15,7 +15,7 @@ export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export MARIADB_DATABASE="vectordb"
|
||||
export MARIADB_USER="chatqna"
|
||||
export MARIADB_PASSWORD="password"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
@@ -43,7 +43,7 @@ Some HuggingFace resources, such as some models, are only accessible if you have
|
||||
|
||||
### Configure the Deployment Environment
|
||||
|
||||
To set up environment variables for deploying ChatQnA services, source the _setup_env.sh_ script in this directory (If using faqgen or guardrails, source the _set_env_faqgen.sh_):
|
||||
To set up environment variables for deploying ChatQnA services, source the _setup_env.sh_ script in this directory:
|
||||
|
||||
```
|
||||
source ./set_env.sh
|
||||
|
||||
@@ -31,7 +31,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -67,7 +67,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:1.5.0
|
||||
@@ -101,7 +101,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
@@ -61,7 +61,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:1.5.0
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
@@ -61,7 +61,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:1.5.0
|
||||
@@ -95,7 +95,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
GURADRAILS_MODEL_ID: ${GURADRAILS_MODEL_ID}
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
SAFETY_GUARD_MODEL_ID: ${GURADRAILS_MODEL_ID}
|
||||
SAFETY_GUARD_ENDPOINT: http://vllm-guardrails-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -104,7 +104,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -140,7 +140,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -96,7 +96,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -60,7 +60,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
|
||||
@@ -123,7 +123,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
|
||||
@@ -4,20 +4,12 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Function to prompt for input and set environment variables
|
||||
NON_INTERACTIVE=${NON_INTERACTIVE:-false}
|
||||
|
||||
prompt_for_env_var() {
|
||||
local var_name="$1"
|
||||
local prompt_message="$2"
|
||||
local default_value="$3"
|
||||
local mandatory="$4"
|
||||
|
||||
if [[ "$NON_INTERACTIVE" == "true" ]]; then
|
||||
echo "Non-interactive environment detected. Setting $var_name to default: $default_value"
|
||||
export "$var_name"="$default_value"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ "$mandatory" == "true" ]]; then
|
||||
while [[ -z "$value" ]]; do
|
||||
read -p "$prompt_message [default: \"${default_value}\"]: " value
|
||||
@@ -42,7 +34,7 @@ popd > /dev/null
|
||||
|
||||
# Prompt the user for each required environment variable
|
||||
prompt_for_env_var "EMBEDDING_MODEL_ID" "Enter the EMBEDDING_MODEL_ID" "BAAI/bge-base-en-v1.5" false
|
||||
prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "${HF_TOKEN}" true
|
||||
prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "" true
|
||||
prompt_for_env_var "RERANK_MODEL_ID" "Enter the RERANK_MODEL_ID" "BAAI/bge-reranker-base" false
|
||||
prompt_for_env_var "LLM_MODEL_ID" "Enter the LLM_MODEL_ID" "meta-llama/Meta-Llama-3-8B-Instruct" false
|
||||
prompt_for_env_var "INDEX_NAME" "Enter the INDEX_NAME" "rag-redis" false
|
||||
@@ -50,40 +42,34 @@ prompt_for_env_var "NUM_CARDS" "Enter the number of Gaudi devices" "1" false
|
||||
prompt_for_env_var "host_ip" "Enter the host_ip" "$(curl ifconfig.me)" false
|
||||
|
||||
#Query for enabling http_proxy
|
||||
prompt_for_env_var "http_proxy" "Enter the http_proxy." "${http_proxy}" false
|
||||
prompt_for_env_var "http_proxy" "Enter the http_proxy." "" false
|
||||
|
||||
#Query for enabling https_proxy
|
||||
prompt_for_env_var "http_proxy" "Enter the http_proxy." "${https_proxy}" false
|
||||
prompt_for_env_var "https_proxy" "Enter the https_proxy." "" false
|
||||
|
||||
#Query for enabling no_proxy
|
||||
prompt_for_env_var "no_proxy" "Enter the no_proxy." "${no_proxy}" false
|
||||
prompt_for_env_var "no_proxy" "Enter the no_proxy." "" false
|
||||
|
||||
# Query for enabling logging
|
||||
if [[ "$NON_INTERACTIVE" == "true" ]]; then
|
||||
# Query for enabling logging
|
||||
prompt_for_env_var "LOGFLAG" "Enable logging? (yes/no): " "true" false
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
telemetry_flag=true
|
||||
read -p "Enable logging? (yes/no): " logging && logging=$(echo "$logging" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$logging" == "yes" || "$logging" == "y" ]]; then
|
||||
export LOGFLAG=true
|
||||
else
|
||||
# Query for enabling logging
|
||||
read -p "Enable logging? (yes/no): " logging && logging=$(echo "$logging" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$logging" == "yes" || "$logging" == "y" ]]; then
|
||||
export LOGFLAG=true
|
||||
else
|
||||
export LOGFLAG=false
|
||||
fi
|
||||
# Query for enabling OpenTelemetry Tracing Endpoint
|
||||
read -p "Enable OpenTelemetry Tracing Endpoint? (yes/no): " telemetry && telemetry=$(echo "$telemetry" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$telemetry" == "yes" || "$telemetry" == "y" ]]; then
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
telemetry_flag=true
|
||||
else
|
||||
telemetry_flag=false
|
||||
fi
|
||||
export LOGFLAG=false
|
||||
fi
|
||||
|
||||
# Query for enabling OpenTelemetry Tracing Endpoint
|
||||
read -p "Enable OpenTelemetry Tracing Endpoint? (yes/no): " telemetry && telemetry=$(echo "$telemetry" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$telemetry" == "yes" || "$telemetry" == "y" ]]; then
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
telemetry_flag=true
|
||||
pushd "grafana/dashboards" > /dev/null
|
||||
source download_opea_dashboard.sh
|
||||
popd > /dev/null
|
||||
else
|
||||
telemetry_flag=false
|
||||
fi
|
||||
|
||||
# Generate the .env file
|
||||
@@ -92,7 +78,7 @@ cat <<EOF > .env
|
||||
# Set all required ENV values
|
||||
export TAG=${TAG}
|
||||
export EMBEDDING_MODEL_ID=${EMBEDDING_MODEL_ID}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export RERANK_MODEL_ID=${RERANK_MODEL_ID}
|
||||
export LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
export INDEX_NAME=${INDEX_NAME}
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export NUM_CARDS=1
|
||||
export VLLM_SKIP_WARMUP=true
|
||||
export LOGFLAG=True
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
|
||||
|
||||
export LLM_ENDPOINT_PORT=8010
|
||||
export LLM_SERVER_PORT=9001
|
||||
export CHATQNA_BACKEND_PORT=8888
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||
export NGINX_PORT=80
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
interval: 10s
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
@@ -98,7 +98,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
ipc: host
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
# ChatQnA E2E test scripts
|
||||
|
||||
## Set the required environment variable
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
## Run test
|
||||
|
||||
On Intel Xeon with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_tgi_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with MariaDB Vector:
|
||||
|
||||
```bash
|
||||
bash test_compose_mariadb_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with Pinecone:
|
||||
|
||||
```bash
|
||||
bash test_compose_pinecone_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with Milvus
|
||||
|
||||
```bash
|
||||
bash test_compose_milvus_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon with Qdrant
|
||||
|
||||
```bash
|
||||
bash test_compose_qdrant_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Xeon without Rerank:
|
||||
|
||||
```bash
|
||||
bash test_compose_without_rerank_on_xeon.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_tgi_on_gaudi.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_gaudi.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi with Guardrails:
|
||||
|
||||
```bash
|
||||
bash test_compose_guardrails_on_gaudi.sh
|
||||
```
|
||||
|
||||
On Intel Gaudi without Rerank:
|
||||
|
||||
```bash
|
||||
bash test_compose_without_rerank_on_gaudi.sh
|
||||
```
|
||||
|
||||
On AMD ROCm with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_on_rocm.sh
|
||||
```
|
||||
|
||||
On AMD ROCm with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_vllm_on_rocm.sh
|
||||
```
|
||||
|
||||
Test FAQ Generation On Intel Xeon with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_faqgen_tgi_on_xeon.sh
|
||||
```
|
||||
|
||||
Test FAQ Generation On Intel Xeon with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_faqgen_on_xeon.sh
|
||||
```
|
||||
|
||||
Test FAQ Generation On Intel Gaudi with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_faqgen_tgi_on_gaudi.sh
|
||||
```
|
||||
|
||||
Test FAQ Generation On Intel Gaudi with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_faqgen_on_gaudi.sh
|
||||
```
|
||||
|
||||
Test FAQ Generation On AMD ROCm with TGI:
|
||||
|
||||
```bash
|
||||
bash test_compose_faqgen_on_rocm.sh
|
||||
```
|
||||
|
||||
Test FAQ Generation On AMD ROCm with vLLM:
|
||||
|
||||
```bash
|
||||
bash test_compose_faqgen_vllm_on_rocm.sh
|
||||
```
|
||||
@@ -24,7 +24,7 @@ function build_docker_images() {
|
||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
popd && sleep 1s
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
@@ -36,7 +36,27 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
source set_env_faqgen.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export host_ip=${ip_address}
|
||||
export LLM_ENDPOINT_PORT=8010
|
||||
export LLM_SERVER_PORT=9001
|
||||
export CHATQNA_BACKEND_PORT=8888
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||
export NGINX_PORT=80
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export VLLM_SKIP_WARMUP=true
|
||||
export LOGFLAG=True
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -15,7 +15,44 @@ WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
||||
export CHATQNA_TGI_SERVICE_PORT=18008
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
|
||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
||||
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
||||
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_LLM_ENDPOINT="http://${HOST_IP}:${CHATQNA_TGI_SERVICE_PORT}"
|
||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
||||
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
||||
export CHATQNA_INDEX_NAME="rag-redis"
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
||||
|
||||
export PATH="~/miniconda3/bin:$PATH"
|
||||
|
||||
|
||||
@@ -37,16 +37,26 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export host_ip=${ip_address}
|
||||
export LLM_ENDPOINT_PORT=8010
|
||||
export LLM_SERVER_PORT=9001
|
||||
export CHATQNA_BACKEND_PORT=8888
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||
export NGINX_PORT=80
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export VLLM_SKIP_WARMUP=true
|
||||
export LOGFLAG=True
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
|
||||
source set_env.sh
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -33,8 +33,25 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export host_ip=${ip_address}
|
||||
export LLM_ENDPOINT_PORT=8010
|
||||
export LLM_SERVER_PORT=9001
|
||||
export CHATQNA_BACKEND_PORT=8888
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||
export NGINX_PORT=80
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
||||
source set_env_faqgen.sh
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export LOGFLAG=True
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -37,16 +37,25 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export host_ip=${ip_address}
|
||||
export LLM_ENDPOINT_PORT=8010
|
||||
export LLM_SERVER_PORT=9001
|
||||
export CHATQNA_BACKEND_PORT=8888
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||
export NGINX_PORT=80
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export LOGFLAG=True
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
|
||||
source set_env.sh
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -14,7 +14,41 @@ WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
||||
export CHATQNA_VLLM_SERVICE_PORT=18008
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP_EXTERNAL}
|
||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
||||
export LLM_ENDPOINT="http://${HOST_IP}:${CHATQNA_VLLM_SERVICE_PORT}"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
||||
export CHATQNA_INDEX_NAME="rag-redis"
|
||||
export CHATQNA_TYPE="CHATQNA_FAQGEN"
|
||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -24,7 +24,7 @@ function build_docker_images() {
|
||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
popd && sleep 1s
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
@@ -36,8 +36,14 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
||||
source set_env_faqgen.sh
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_guardrails.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2025 MariaDB Foundation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -39,8 +39,14 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
export MARIADB_DATABASE="vectordb"
|
||||
export MARIADB_USER="chatqna"
|
||||
export MARIADB_PASSWORD="test"
|
||||
source set_env_mariadb.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_mariadb.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -39,8 +39,11 @@ function build_docker_images() {
|
||||
}
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LOGFLAG=true
|
||||
source set_env.sh
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_milvus.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -24,7 +24,7 @@ function build_docker_images() {
|
||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
popd && sleep 1s
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
@@ -36,10 +36,16 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export NON_INTERACTIVE=true
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export telemetry=yes
|
||||
source set_env.sh
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -15,7 +15,41 @@ WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env.sh
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
||||
export CHATQNA_TGI_SERVICE_PORT=18008
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
|
||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
||||
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
||||
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
||||
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
||||
export CHATQNA_INDEX_NAME="rag-redis"
|
||||
|
||||
export PATH="~/miniconda3/bin:$PATH"
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -40,7 +40,15 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
|
||||
source set_env.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -41,11 +41,14 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export no_proxy=${no_proxy},${ip_address}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST}
|
||||
export PINECONE_INDEX_NAME="langchain-test"
|
||||
export INDEX_NAME="langchain-test"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LOGFLAG=true
|
||||
source set_env.sh
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -40,8 +40,11 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-qdrant"
|
||||
source set_env.sh
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -32,10 +32,15 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export NON_INTERACTIVE=true
|
||||
export host_ip=${ip_address}
|
||||
export telemetry=yes
|
||||
source set_env.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -33,7 +33,14 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
|
||||
source set_env.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -14,7 +14,42 @@ WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_vllm.sh
|
||||
export HOST_IP=${ip_address}
|
||||
export HOST_IP_EXTERNAL=${ip_address}
|
||||
|
||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||
export CHATQNA_NGINX_PORT=80
|
||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
||||
export CHATQNA_VLLM_SERVICE_PORT=18008
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP_EXTERNAL}
|
||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
||||
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
||||
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
||||
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
||||
|
||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
||||
export CHATQNA_INDEX_NAME="rag-redis"
|
||||
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -24,7 +24,7 @@ function build_docker_images() {
|
||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
popd && sleep 1s
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||
VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
|
||||
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
@@ -36,8 +36,11 @@ function build_docker_images() {
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export NON_INTERACTIVE=true
|
||||
source set_env.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -41,7 +41,10 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
|
||||
source set_env.sh
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
@@ -5,14 +5,14 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
### The IP address or domain name of the server on which the application is running
|
||||
export HOST_IP=${ip_address}
|
||||
export EXTERNAL_HOST_IP=${ip_address}
|
||||
export HOST_IP=''
|
||||
export EXTERNAL_HOST_IP=''
|
||||
|
||||
### The port of the TGI service. On this port, the TGI service will accept connections
|
||||
export CODEGEN_TGI_SERVICE_PORT=8028
|
||||
|
||||
### A token for accessing repositories with models
|
||||
export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
### Model ID
|
||||
export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
@@ -27,7 +27,7 @@ export CODEGEN_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}"
|
||||
export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
|
||||
### The port for CodeGen backend service
|
||||
export CODEGEN_BACKEND_SERVICE_PORT=7778
|
||||
export CODEGEN_BACKEND_SERVICE_PORT=18150
|
||||
|
||||
### The URL of CodeGen backend service, used by the frontend service
|
||||
export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
|
||||
@@ -36,4 +36,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
|
||||
export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
|
||||
### The CodeGen service UI port
|
||||
export CODEGEN_UI_SERVICE_PORT=5173
|
||||
export CODEGEN_UI_SERVICE_PORT=18151
|
||||
|
||||
@@ -5,15 +5,15 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
### The IP address or domain name of the server on which the application is running
|
||||
export HOST_IP=${ip_address}
|
||||
export EXTERNAL_HOST_IP=${ip_address}
|
||||
export HOST_IP=''
|
||||
export EXTERNAL_HOST_IP=''
|
||||
|
||||
### The port of the vLLM service. On this port, the TGI service will accept connections
|
||||
export CODEGEN_VLLM_SERVICE_PORT=8028
|
||||
export CODEGEN_VLLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_VLLM_SERVICE_PORT}"
|
||||
|
||||
### A token for accessing repositories with models
|
||||
export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
### Model ID
|
||||
export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
@@ -25,7 +25,7 @@ export CODEGEN_LLM_SERVICE_PORT=9000
|
||||
export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||
|
||||
### The port for CodeGen backend service
|
||||
export CODEGEN_BACKEND_SERVICE_PORT=7778
|
||||
export CODEGEN_BACKEND_SERVICE_PORT=18150
|
||||
|
||||
### The URL of CodeGen backend service, used by the frontend service
|
||||
export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
|
||||
@@ -34,4 +34,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
|
||||
export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||
|
||||
### The CodeGen service UI port
|
||||
export CODEGEN_UI_SERVICE_PORT=5173
|
||||
export CODEGEN_UI_SERVICE_PORT=18151
|
||||
|
||||
@@ -6,10 +6,22 @@ This README provides instructions for deploying the CodeGen application using Do
|
||||
|
||||
- [Overview](#overview)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Quick Start Deployment](#quick-start-deployment)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Available Deployment Options](#available-deployment-options)
|
||||
- [Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)](#default-vllm-based-deployment---profile-codegen-xeon-vllm)
|
||||
- [TGI-based Deployment (`--profile codegen-xeon-tgi`)](#tgi-based-deployment---profile-codegen-xeon-tgi)
|
||||
- [Configuration Parameters](#configuration-parameters)
|
||||
- [Environment Variables](#environment-variables)
|
||||
- [Compose Profiles](#compose-profiles)
|
||||
- [Building Custom Images (Optional)](#building-custom-images-optional)
|
||||
- [Validate Services](#validate-services)
|
||||
- [Check Container Status](#check-container-status)
|
||||
- [Run Validation Script/Commands](#run-validation-scriptcommands)
|
||||
- [Accessing the User Interface (UI)](#accessing-the-user-interface-ui)
|
||||
- [Gradio UI (Default)](#gradio-ui-default)
|
||||
- [Svelte UI (Optional)](#svelte-ui-optional)
|
||||
- [React UI (Optional)](#react-ui-optional)
|
||||
- [VS Code Extension (Optional)](#vs-code-extension-optional)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Stopping the Application](#stopping-the-application)
|
||||
- [Next Steps](#next-steps)
|
||||
@@ -31,37 +43,27 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co
|
||||
cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon
|
||||
```
|
||||
|
||||
## Quick Start Deployment
|
||||
## Quick Start
|
||||
|
||||
This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).
|
||||
|
||||
1. **Configure Environment:**
|
||||
Set required environment variables in your shell:
|
||||
|
||||
```bash
|
||||
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
|
||||
export HOST_IP="your_external_ip_address"
|
||||
# Replace with your Hugging Face Hub API token
|
||||
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
|
||||
```bash
|
||||
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
|
||||
export HOST_IP="your_external_ip_address"
|
||||
# Replace with your Hugging Face Hub API token
|
||||
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
|
||||
|
||||
# Optional: Configure proxy if needed
|
||||
# export http_proxy="your_http_proxy"
|
||||
# export https_proxy="your_https_proxy"
|
||||
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
|
||||
source ../../set_env.sh
|
||||
```
|
||||
# Optional: Configure proxy if needed
|
||||
# export http_proxy="your_http_proxy"
|
||||
# export https_proxy="your_https_proxy"
|
||||
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
|
||||
source ../../../set_env.sh
|
||||
```
|
||||
|
||||
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
||||
|
||||
For instance, edit the set_env.sh to change the LLM model
|
||||
|
||||
```
|
||||
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
```
|
||||
can be changed to other model if needed
|
||||
```
|
||||
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
|
||||
```
|
||||
_Note: The compose file might read additional variables from a `.env` file or expect them defined elsewhere. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
||||
|
||||
2. **Start Services (vLLM Profile):**
|
||||
|
||||
@@ -72,17 +74,17 @@ This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).
|
||||
3. **Validate:**
|
||||
Wait several minutes for models to download (especially the first time) and services to initialize. Check container logs (`docker compose logs -f <service_name>`) or proceed to the validation steps below.
|
||||
|
||||
### Available Deployment Options
|
||||
## Available Deployment Options
|
||||
|
||||
The `compose.yaml` file uses Docker Compose profiles to select the LLM serving backend.
|
||||
|
||||
#### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)
|
||||
### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)
|
||||
|
||||
- **Profile:** `codegen-xeon-vllm`
|
||||
- **Description:** Uses vLLM optimized for Intel CPUs as the LLM serving engine. This is the default profile used in the Quick Start.
|
||||
- **Services Deployed:** `codegen-vllm-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
|
||||
|
||||
#### TGI-based Deployment (`--profile codegen-xeon-tgi`)
|
||||
### TGI-based Deployment (`--profile codegen-xeon-tgi`)
|
||||
|
||||
- **Profile:** `codegen-xeon-tgi`
|
||||
- **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine.
|
||||
@@ -93,24 +95,24 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
|
||||
docker compose --profile codegen-xeon-tgi up -d
|
||||
```
|
||||
|
||||
### Configuration Parameters
|
||||
## Configuration Parameters
|
||||
|
||||
#### Environment Variables
|
||||
### Environment Variables
|
||||
|
||||
Key parameters are configured via environment variables set before running `docker compose up`.
|
||||
|
||||
| Environment Variable | Description | Default (Set Externally) |
|
||||
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ |
|
||||
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
||||
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
||||
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
||||
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
||||
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-vllm | tgi-server:9000/v1/chat/completions` |
|
||||
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
||||
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
||||
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
|
||||
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
||||
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
||||
| Environment Variable | Description | Default (Set Externally) |
|
||||
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
|
||||
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
||||
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
||||
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
||||
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
||||
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
|
||||
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
||||
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
||||
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
|
||||
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
||||
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
||||
|
||||
Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them.
|
||||
|
||||
@@ -118,7 +120,7 @@ Most of these parameters are in `set_env.sh`, you can either modify this file or
|
||||
source CodeGen/docker_compose/set_env.sh
|
||||
```
|
||||
|
||||
#### Compose Profiles
|
||||
### Compose Profiles
|
||||
|
||||
Docker Compose profiles (`codegen-xeon-vllm`, `codegen-xeon-tgi`) control which LLM serving backend (vLLM or TGI) and its associated dependencies are started. Only one profile should typically be active.
|
||||
|
||||
@@ -150,11 +152,11 @@ Check logs for specific services: `docker compose logs <service_name>`
|
||||
|
||||
Use `curl` commands to test the main service endpoints. Ensure `HOST_IP` is correctly set in your environment.
|
||||
|
||||
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**
|
||||
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 8000 internally, exposed differently):**
|
||||
|
||||
```bash
|
||||
# This command structure targets the OpenAI-compatible vLLM endpoint
|
||||
curl http://${HOST_IP}:9000/v1/chat/completions \
|
||||
curl http://${HOST_IP}:8000/v1/chat/completions \
|
||||
-X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
|
||||
@@ -177,8 +179,8 @@ Multiple UI options can be configured via the `compose.yaml`.
|
||||
### Gradio UI (Default)
|
||||
|
||||
Access the default Gradio UI by navigating to:
|
||||
`http://{HOST_IP}:5173`
|
||||
_(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_
|
||||
`http://{HOST_IP}:8080`
|
||||
_(Port `8080` is the default host mapping for `codegen-gradio-ui-server`)_
|
||||
|
||||

|
||||

|
||||
|
||||
@@ -17,7 +17,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
|
||||
@@ -39,7 +39,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
|
||||
@@ -56,7 +56,7 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
llm-tgi-service:
|
||||
extends: llm-base
|
||||
@@ -140,7 +140,7 @@ services:
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${host_ip}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: true
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||
@@ -162,7 +162,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
host_ip: ${host_ip}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:80/health"]
|
||||
interval: 10s
|
||||
@@ -202,7 +202,7 @@ services:
|
||||
REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${host_ip}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: true
|
||||
restart: unless-stopped
|
||||
tei-embedding-serving:
|
||||
@@ -76,7 +76,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
host_ip: ${host_ip}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
|
||||
interval: 10s
|
||||
@@ -116,7 +116,7 @@ services:
|
||||
REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -6,10 +6,23 @@ This README provides instructions for deploying the CodeGen application using Do
|
||||
|
||||
- [Overview](#overview)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Quick Start Deployment](#quick-start-deployment)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Available Deployment Options](#available-deployment-options)
|
||||
- [Default: vLLM-based Deployment (`--profile codegen-gaudi-vllm`)](#default-vllm-based-deployment---profile-codegen-gaudi-vllm)
|
||||
- [TGI-based Deployment (`--profile codegen-gaudi-tgi`)](#tgi-based-deployment---profile-codegen-gaudi-tgi)
|
||||
- [Configuration Parameters](#configuration-parameters)
|
||||
- [Environment Variables](#environment-variables)
|
||||
- [Compose Profiles](#compose-profiles)
|
||||
- [Docker Compose Gaudi Configuration](#docker-compose-gaudi-configuration)
|
||||
- [Building Custom Images (Optional)](#building-custom-images-optional)
|
||||
- [Validate Services](#validate-services)
|
||||
- [Check Container Status](#check-container-status)
|
||||
- [Run Validation Script/Commands](#run-validation-scriptcommands)
|
||||
- [Accessing the User Interface (UI)](#accessing-the-user-interface-ui)
|
||||
- [Gradio UI (Default)](#gradio-ui-default)
|
||||
- [Svelte UI (Optional)](#svelte-ui-optional)
|
||||
- [React UI (Optional)](#react-ui-optional)
|
||||
- [VS Code Extension (Optional)](#vs-code-extension-optional)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Stopping the Application](#stopping-the-application)
|
||||
- [Next Steps](#next-steps)
|
||||
@@ -31,7 +44,7 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co
|
||||
cd GenAIExamples/CodeGen/docker_compose/intel/hpu/gaudi
|
||||
```
|
||||
|
||||
## Quick Start Deployment
|
||||
## Quick Start
|
||||
|
||||
This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).
|
||||
|
||||
@@ -48,21 +61,10 @@ This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).
|
||||
# export http_proxy="your_http_proxy"
|
||||
# export https_proxy="your_https_proxy"
|
||||
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
|
||||
source ../../set_env.sh
|
||||
source ../../../set_env.sh
|
||||
```
|
||||
|
||||
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
||||
For instance, edit the set_env.sh to change the LLM model
|
||||
|
||||
```
|
||||
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
```
|
||||
|
||||
can be changed to other model if needed
|
||||
|
||||
```
|
||||
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
|
||||
```
|
||||
_Note: Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
||||
|
||||
2. **Start Services (vLLM Profile):**
|
||||
|
||||
@@ -102,18 +104,18 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
|
||||
|
||||
Key parameters are configured via environment variables set before running `docker compose up`.
|
||||
|
||||
| Environment Variable | Description | Default (Set Externally) |
|
||||
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ |
|
||||
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
||||
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
||||
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
||||
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
||||
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `llm-codegen-vllm-server`). Configured in `compose.yaml`. | http://codegen-vllm | tgi-server:9000/v1/chat/completions` |
|
||||
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
||||
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
||||
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
|
||||
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
||||
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
||||
| Environment Variable | Description | Default (Set Externally) |
|
||||
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
|
||||
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
||||
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
||||
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
||||
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
||||
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
|
||||
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
||||
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
||||
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
|
||||
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
||||
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
||||
|
||||
Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them.
|
||||
|
||||
@@ -170,11 +172,11 @@ Check logs: `docker compose logs <service_name>`. Pay attention to `vllm-gaudi-s
|
||||
|
||||
Use `curl` commands targeting the main service endpoints. Ensure `HOST_IP` is correctly set.
|
||||
|
||||
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**
|
||||
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 8000 internally, exposed differently):**
|
||||
|
||||
```bash
|
||||
# This command structure targets the OpenAI-compatible vLLM endpoint
|
||||
curl http://${HOST_IP}:9000/v1/chat/completions \
|
||||
curl http://${HOST_IP}:8000/v1/chat/completions \
|
||||
-X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
|
||||
@@ -195,8 +197,8 @@ UI options are similar to the Xeon deployment.
|
||||
### Gradio UI (Default)
|
||||
|
||||
Access the default Gradio UI:
|
||||
`http://{HOST_IP}:5173`
|
||||
_(Port `5173` is the default host mapping)_
|
||||
`http://{HOST_IP}:8080`
|
||||
_(Port `8080` is the default host mapping)_
|
||||
|
||||

|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user