Refactor Faqgen (#1323)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
This commit is contained in:
@@ -16,7 +16,7 @@ python get_context.py
|
||||
|
||||
### Launch FaQGen microservice
|
||||
|
||||
Please refer to [FaQGen microservice](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/faq-generation/tgi/langchain/README.md), set up an microservice endpoint.
|
||||
Please refer to [FaQGen microservice](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/faq-generation/README.md), set up an microservice endpoint.
|
||||
|
||||
```
|
||||
export FAQ_ENDPOINT = "http://${your_ip}:9000/v1/faqgen"
|
||||
|
||||
@@ -33,6 +33,8 @@ export FAQGEN_LLM_SERVER_PORT=9000
|
||||
export FAQGEN_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export FAQGEN_BACKEND_SERVER_PORT=8888
|
||||
export FAGGEN_UI_PORT=5173
|
||||
export LLM_ENDPOINT="http://${HOST_IP}:${FAQGEN_TGI_SERVICE_PORT}"
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
|
||||
|
||||
@@ -14,6 +14,13 @@ services:
|
||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${FAQGEN_TGI_SERVICE_PORT}"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${FAQGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${FAQGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
HOST_IP: ${HOST_IP}
|
||||
FAQGEN_TGI_SERVICE_PORT: ${FAQGEN_TGI_SERVICE_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://${HOST_IP}:${FAQGEN_TGI_SERVICE_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
volumes:
|
||||
- "/var/opea/faqgen-service/data:/data"
|
||||
shm_size: 1g
|
||||
@@ -30,10 +37,11 @@ services:
|
||||
ipc: host
|
||||
command: --model-id ${FAQGEN_LLM_MODEL_ID}
|
||||
faqgen-llm-server:
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||
container_name: faqgen-llm-server
|
||||
depends_on:
|
||||
- faqgen-tgi-service
|
||||
faqgen-tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "${FAQGEN_LLM_SERVER_PORT:-9000}:9000"
|
||||
ipc: host
|
||||
@@ -41,9 +49,12 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${FAQGEN_TGI_SERVICE_PORT}"
|
||||
LLM_ENDPOINT: "http://${HOST_IP}:${FAQGEN_TGI_SERVICE_PORT}"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${FAQGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${FAQGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${FAQGEN_LLM_MODEL_ID}
|
||||
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
faqgen-backend-server:
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
|
||||
@@ -19,7 +19,7 @@ First of all, you need to build Docker Images locally. This step can be ignored
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/llm-faqgen-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/langchain/Dockerfile .
|
||||
docker build -t opea/llm-faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build MegaService Docker Image
|
||||
@@ -53,7 +53,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
|
||||
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `opea/llm-faqgen-tgi:latest`
|
||||
1. `opea/llm-faqgen:latest`
|
||||
2. `opea/faqgen:latest`
|
||||
3. `opea/faqgen-ui:latest`
|
||||
4. `opea/faqgen-react-ui:latest`
|
||||
@@ -74,16 +74,19 @@ Since the `compose.yaml` will consume some environment variables, you need to se
|
||||
export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export host_ip=${your_host_ip}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_SERVICE_PORT=9000
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/faqgen"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
|
||||
Note: Please replace with `your_host_ip` with your external IP address, do not use localhost.
|
||||
|
||||
### Start Microservice Docker Containers
|
||||
|
||||
|
||||
@@ -6,31 +6,41 @@ services:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-xeon-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
host_ip: ${host_ip}
|
||||
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm_faqgen:
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||
container_name: llm-faqgen-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- ${LLM_SERVICE_PORT:-9000}:9000
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
faqgen-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
|
||||
@@ -98,7 +98,7 @@ docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/llm-faqgen-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/langchain/Dockerfile .
|
||||
docker build -t opea/llm-faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build MegaService Docker Image
|
||||
@@ -133,7 +133,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
2. `opea/llm-faqgen-tgi:latest`
|
||||
2. `opea/llm-faqgen:latest`
|
||||
3. `opea/faqgen:latest`
|
||||
4. `opea/faqgen-ui:latest`
|
||||
5. `opea/faqgen-react-ui:latest`
|
||||
@@ -154,12 +154,15 @@ Since the `compose.yaml` will consume some environment variables, you need to se
|
||||
export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export host_ip=${your_host_ip}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_SERVICE_PORT=9000
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/faqgen"
|
||||
```
|
||||
|
||||
|
||||
@@ -6,41 +6,54 @@ services:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
PREFILL_BATCH_BUCKET_SIZE: 1
|
||||
BATCH_BUCKET_SIZE: 8
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
host_ip: ${host_ip}
|
||||
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "sleep 500 && exit 0"]
|
||||
interval: 1s
|
||||
timeout: 505s
|
||||
retries: 1
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
|
||||
llm_faqgen:
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||
container_name: llm-faqgen-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- ${LLM_SERVICE_PORT:-9000}:9000
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_ENDPOINT: ${LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
faqgen-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
|
||||
@@ -23,9 +23,9 @@ services:
|
||||
dockerfile: ./docker/Dockerfile.react
|
||||
extends: faqgen
|
||||
image: ${REGISTRY:-opea}/faqgen-react-ui:${TAG:-latest}
|
||||
llm-faqgen-tgi:
|
||||
llm-faqgen:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/faq-generation/tgi/langchain/Dockerfile
|
||||
dockerfile: comps/llms/src/faq-generation/Dockerfile
|
||||
extends: faqgen
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||
|
||||
@@ -19,7 +19,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="faqgen faqgen-ui llm-faqgen-tgi"
|
||||
service_list="faqgen faqgen-ui llm-faqgen"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
@@ -29,28 +29,24 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
|
||||
export host_ip=${ip_address}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_PORT=9000
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
|
||||
export LOGFLAG=True
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
|
||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
sleep 30s
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
|
||||
@@ -23,19 +23,20 @@ export FAQGEN_CARD_ID="card1"
|
||||
export FAQGEN_RENDER_ID="renderD136"
|
||||
export FAQGEN_BACKEND_SERVER_PORT=8888
|
||||
export FAGGEN_UI_PORT=5173
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
|
||||
export PATH="~/miniconda3/bin:$PATH"
|
||||
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
export LOGFLAG=True
|
||||
|
||||
function build_docker_images() {
|
||||
cd "$WORKPATH"/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="faqgen faqgen-ui llm-faqgen-tgi"
|
||||
service_list="faqgen faqgen-ui llm-faqgen"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
docker images && sleep 1s
|
||||
@@ -49,15 +50,7 @@ function start_services() {
|
||||
# Start Docker Containers
|
||||
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs faggen-tgi-service > "${LOG_PATH}"/tgi_service_start.log
|
||||
if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
sleep 30s
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
|
||||
@@ -19,7 +19,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="faqgen faqgen-ui llm-faqgen-tgi"
|
||||
service_list="faqgen faqgen-ui llm-faqgen"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||
@@ -29,28 +29,24 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
|
||||
export host_ip=${ip_address}
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_PORT=9000
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
|
||||
export LOGFLAG=True
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-xeon-server > ${LOG_PATH}/tgi_service_start.log
|
||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
sleep 30s
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
|
||||
@@ -147,7 +147,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
|
||||
test: ["CMD-SHELL", "curl -f http://${host_ip}:9009/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
@@ -259,10 +259,11 @@ services:
|
||||
ipc: host
|
||||
restart: always
|
||||
llm_faqgen:
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||
container_name: llm-faqgen-server
|
||||
depends_on:
|
||||
- tgi_service
|
||||
tgi_service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9002:9000"
|
||||
ipc: host
|
||||
@@ -270,8 +271,14 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
|
||||
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
faqgen-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
|
||||
@@ -77,9 +77,9 @@ services:
|
||||
context: ../../FaqGen/
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
llm-faqgen-tgi:
|
||||
llm-faqgen:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/faq-generation/tgi/langchain/Dockerfile
|
||||
dockerfile: comps/llms/src/faq-generation/Dockerfile
|
||||
extends: faqgen
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||
|
||||
@@ -183,7 +183,7 @@ spec:
|
||||
- configMapRef:
|
||||
name: faqgen-llm-uservice-config
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen-tgi:latest"
|
||||
image: "opea/llm-faqgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
|
||||
@@ -80,20 +80,13 @@ function start_services() {
|
||||
export LLM_SERVER_PORT=9009
|
||||
export PROMPT_COLLECTION_NAME="prompt"
|
||||
export host_ip=${ip_address}
|
||||
export LOGFLAG=true
|
||||
export FAQGen_COMPONENT_NAME="OPEAFAQGen_TGI"
|
||||
export LOGFLAG=True
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
|
||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
||||
echo "ChatQnA TGI Service Connected"
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
sleep 30s
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi_service_codegen > ${LOG_PATH}/tgi_service_codegen_start.log
|
||||
|
||||
@@ -69,7 +69,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
|
||||
| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
|
||||
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
|
||||
| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
|
||||
| [opea/llm-faqgen-tgi](https://hub.docker.com/r/opea/llm-faqgen-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/faq-generation/tgi/langchain/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
||||
| [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
||||
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use |
|
||||
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |
|
||||
| [opea/lvm-tgi](https://hub.docker.com/r/opea/lvm-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/tgi-llava/Dockerfile) | This docker image is designed to build a large visual model (LVM) microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a answer to question. |
|
||||
|
||||
Reference in New Issue
Block a user