Update Code and README for GenAIComps Refactor (#1285)
Signed-off-by: lvliang-intel <liang1.lv@intel.com> Signed-off-by: chensuyue <suyue.chen@intel.com> Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Signed-off-by: letonghan <letong.han@intel.com> Signed-off-by: ZePan110 <ze.pan@intel.com> Signed-off-by: WenjiaoYue <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>
This commit is contained in:
@@ -13,7 +13,7 @@ First of all, you need to build Docker Images locally and install the python pac
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
@@ -25,7 +25,7 @@ docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$
|
||||
### 3. Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
@@ -33,7 +33,7 @@ docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$ht
|
||||
#### Use TGI as backend
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
@@ -28,10 +28,13 @@ services:
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
DATAPREP_TYPE: ${DATAPREP_TYPE}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "6006:80"
|
||||
@@ -42,12 +45,18 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:6006/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
tei-embedding-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
@@ -56,9 +65,8 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
@@ -77,9 +85,12 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
RETRIEVER_TYPE: ${RETRIEVER_TYPE}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${RERANK_MODEL_ID} --auto-truncate"
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
@@ -93,12 +104,18 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8808/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
tei-reranking-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
@@ -106,13 +123,12 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
RERANK_TYPE: ${RERANK_TYPE}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
tgi_service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
@@ -129,12 +145,19 @@ services:
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi_service
|
||||
tgi_service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
@@ -142,13 +165,12 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
|
||||
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
chatqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
@@ -194,12 +216,19 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 30
|
||||
command: --model-id ${LLM_MODEL_ID_CODEGEN}
|
||||
llm_codegen:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server-codegen
|
||||
depends_on:
|
||||
- tgi_service_codegen
|
||||
tgi_service_codegen:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9001:9000"
|
||||
ipc: host
|
||||
@@ -207,11 +236,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN}
|
||||
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
codegen-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
|
||||
@@ -227,6 +255,7 @@ services:
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
|
||||
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN}
|
||||
LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
llm_faqgen:
|
||||
@@ -243,9 +272,6 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
restart: unless-stopped
|
||||
faqgen-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
@@ -262,41 +288,7 @@ services:
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
|
||||
LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN}
|
||||
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN}
|
||||
ipc: host
|
||||
restart: always
|
||||
llm_docsum_server:
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
container_name: llm-docsum-server
|
||||
depends_on:
|
||||
- tgi_service
|
||||
ports:
|
||||
- "9003:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
restart: unless-stopped
|
||||
docsum-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||
container_name: docsum-xeon-backend-server
|
||||
depends_on:
|
||||
- tgi_service
|
||||
- llm_docsum_server
|
||||
ports:
|
||||
- "8890:8888"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM}
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
|
||||
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
mongo:
|
||||
@@ -323,6 +315,7 @@ services:
|
||||
MONGO_HOST: ${MONGO_HOST}
|
||||
MONGO_PORT: ${MONGO_PORT}
|
||||
COLLECTION_NAME: ${COLLECTION_NAME}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
|
||||
promptregistry-mongo:
|
||||
@@ -338,6 +331,7 @@ services:
|
||||
MONGO_HOST: ${MONGO_HOST}
|
||||
MONGO_PORT: ${MONGO_PORT}
|
||||
COLLECTION_NAME: ${PROMPT_COLLECTION_NAME}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
keycloak:
|
||||
image: quay.io/keycloak/keycloak:25.0.2
|
||||
@@ -371,6 +365,7 @@ services:
|
||||
- APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
|
||||
- APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
|
||||
- APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
networks:
|
||||
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
embedding-tei:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/embeddings/tei/langchain/Dockerfile
|
||||
dockerfile: comps/embeddings/src/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
retriever-redis:
|
||||
@@ -26,13 +26,13 @@ services:
|
||||
reranking-tei:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/reranks/tei/Dockerfile
|
||||
dockerfile: comps/reranks/src/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
llm-tgi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/text-generation/tgi/Dockerfile
|
||||
dockerfile: comps/llms/src/text-generation/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
dataprep-redis:
|
||||
@@ -68,15 +68,6 @@ services:
|
||||
context: ../../CodeGen/
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
|
||||
docsum:
|
||||
build:
|
||||
args:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
context: ../../DocSum/
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||
faqgen:
|
||||
build:
|
||||
args:
|
||||
@@ -92,9 +83,3 @@ services:
|
||||
dockerfile: comps/llms/faq-generation/tgi/langchain/Dockerfile
|
||||
extends: faqgen
|
||||
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
|
||||
llm-docsum-tgi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
|
||||
extends: docsum
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
|
||||
@@ -30,9 +30,12 @@ function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_TYPE="tei"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export LLM_MODEL_ID_CODEGEN="Intel/neural-chat-7b-v3-3"
|
||||
export DATAPREP_TYPE="redis"
|
||||
export RETRIEVER_TYPE="redis"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
|
||||
@@ -57,9 +60,8 @@ function start_services() {
|
||||
export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${ip_address}:8889/v1/faqgen"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file"
|
||||
export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${ip_address}:7778/v1/codegen"
|
||||
export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${ip_address}:8890/v1/docsum"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get"
|
||||
export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
|
||||
export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
|
||||
export CHAT_HISTORY_DELETE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/delete"
|
||||
@@ -73,11 +75,12 @@ function start_services() {
|
||||
export COLLECTION_NAME="Conversations"
|
||||
export LLM_SERVICE_HOST_PORT_FAQGEN=9002
|
||||
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
|
||||
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
|
||||
export RERANK_SERVER_PORT=8808
|
||||
export EMBEDDING_SERVER_PORT=6006
|
||||
export LLM_SERVER_PORT=9009
|
||||
export PROMPT_COLLECTION_NAME="prompt"
|
||||
export host_ip=${ip_address}
|
||||
export LOGFLAG=true
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
@@ -101,6 +104,8 @@ function start_services() {
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
|
||||
sleep 10s
|
||||
}
|
||||
|
||||
function validate_service() {
|
||||
@@ -119,9 +124,9 @@ function validate_service() {
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
|
||||
elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
|
||||
elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
|
||||
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
|
||||
elif [[ $SERVICE_NAME == *"faqgen-xeon-backend-server"* ]]; then
|
||||
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -F "max_tokens=32" -F "stream=False" -H 'Content-Type: multipart/form-data' "$URL")
|
||||
else
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
fi
|
||||
@@ -162,13 +167,20 @@ function validate_microservices() {
|
||||
# embedding microservice
|
||||
validate_service \
|
||||
"${ip_address}:6000/v1/embeddings" \
|
||||
'"text":"What is Deep Learning?","embedding":[' \
|
||||
'"embedding":[' \
|
||||
"embedding-microservice" \
|
||||
"embedding-tei-server" \
|
||||
'{"text":"What is Deep Learning?"}'
|
||||
'{"input":"What is Deep Learning?"}'
|
||||
|
||||
sleep 1m # retrieval can't curl as expected, try to wait for more time
|
||||
|
||||
# test /v1/dataprep/delete_file
|
||||
validate_service \
|
||||
"http://${ip_address}:6007/v1/dataprep/delete_file" \
|
||||
'{"status":true}' \
|
||||
"dataprep_del" \
|
||||
"dataprep-redis-server"
|
||||
|
||||
# test /v1/dataprep upload file
|
||||
echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
|
||||
validate_service \
|
||||
@@ -191,13 +203,6 @@ function validate_microservices() {
|
||||
"dataprep_get" \
|
||||
"dataprep-redis-server"
|
||||
|
||||
# test /v1/dataprep/delete_file
|
||||
validate_service \
|
||||
"http://${ip_address}:6007/v1/dataprep/delete_file" \
|
||||
'{"status":true}' \
|
||||
"dataprep_del" \
|
||||
"dataprep-redis-server"
|
||||
|
||||
# retrieval microservice
|
||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
validate_service \
|
||||
@@ -247,14 +252,6 @@ function validate_microservices() {
|
||||
"llm-faqgen-server" \
|
||||
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
|
||||
|
||||
# Docsum llm microservice
|
||||
validate_service \
|
||||
"${ip_address}:9003/v1/chat/docsum" \
|
||||
"data: " \
|
||||
"llm_docsum" \
|
||||
"llm-docsum-server" \
|
||||
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
|
||||
|
||||
# CodeGen llm microservice
|
||||
validate_service \
|
||||
"${ip_address}:9001/v1/chat/completions" \
|
||||
@@ -309,7 +306,6 @@ function validate_megaservice() {
|
||||
"chatqna-xeon-backend-server" \
|
||||
'{"messages": "What is the revenue of Nike in 2023?"}'\
|
||||
|
||||
|
||||
# Curl the FAQGen Service
|
||||
validate_service \
|
||||
"${ip_address}:8889/v1/faqgen" \
|
||||
@@ -318,15 +314,6 @@ function validate_megaservice() {
|
||||
"faqgen-xeon-backend-server" \
|
||||
'{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'\
|
||||
|
||||
# Curl the DocSum Mega Service
|
||||
validate_service \
|
||||
"${ip_address}:8890/v1/docsum" \
|
||||
"embedding" \
|
||||
"docsum-xeon-backend-server" \
|
||||
"docsum-xeon-backend-server" \
|
||||
'{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
|
||||
|
||||
|
||||
# Curl the CodeGen Mega Service
|
||||
validate_service \
|
||||
"${ip_address}:7778/v1/codegen" \
|
||||
|
||||
Reference in New Issue
Block a user