Compare commits
23 Commits
v1.0
...
AISEprofil
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09fa201b30 | ||
|
|
2de1bfc5bb | ||
|
|
75df2c9979 | ||
|
|
62e06a0aff | ||
|
|
bd32b03e3c | ||
|
|
9d0b49c2d6 | ||
|
|
75ce2a3ca6 | ||
|
|
99c10933b4 | ||
|
|
8bcd82e82d | ||
|
|
c1038d2193 | ||
|
|
33b9d4e421 | ||
|
|
c9553c6f9a | ||
|
|
3e796ba73d | ||
|
|
5ed776709d | ||
|
|
954a22051b | ||
|
|
6f4b00f829 | ||
|
|
3fb60608b3 | ||
|
|
c35fe0b429 | ||
|
|
28f5e4a268 | ||
|
|
d55a33dda1 | ||
|
|
daf2a4fad7 | ||
|
|
3ce395582b | ||
|
|
7eaab93d0b |
13
.github/workflows/_example-workflow.yml
vendored
13
.github/workflows/_example-workflow.yml
vendored
@@ -46,33 +46,30 @@ jobs:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
- name: Get Checkout Ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
- name: Checkout out GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Clone required Repo
|
||||
- name: Clone Required Repo
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/huggingface/tei-gaudi.git
|
||||
fi
|
||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd vllm && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
|
||||
|
||||
- name: Build Image
|
||||
if: ${{ fromJSON(inputs.build) }}
|
||||
|
||||
16
.github/workflows/manual-docker-publish.yml
vendored
16
.github/workflows/manual-docker-publish.yml
vendored
@@ -11,23 +11,23 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "Translation"
|
||||
description: 'List of examples to publish [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
default: ""
|
||||
description: 'List of examples to publish [AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA]'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
default: "gmcmanager,gmcrouter"
|
||||
description: 'List of images to publish [gmcmanager,gmcrouter, ...]'
|
||||
default: ""
|
||||
description: 'List of images to publish [gmcmanager,gmcrouter]'
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "v0.9"
|
||||
description: "Tag to publish"
|
||||
default: "rc"
|
||||
description: "Tag to publish, like [1.0rc]"
|
||||
required: true
|
||||
type: string
|
||||
publish_tags:
|
||||
default: "latest,v0.9"
|
||||
description: 'Tag list apply to publish images'
|
||||
default: "latest,1.x"
|
||||
description: "Tag list apply to publish images, like [latest,1.0]"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
||||
8
.github/workflows/manual-docker-scan.yml
vendored
8
.github/workflows/manual-docker-scan.yml
vendored
@@ -11,13 +11,13 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
description: 'List of examples to scan [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
default: ""
|
||||
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
default: "gmcmanager,gmcrouter"
|
||||
description: 'List of images to scan [gmcmanager,gmcrouter, ...]'
|
||||
default: ""
|
||||
description: 'List of images to publish "gmcmanager,gmcrouter"'
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
|
||||
54
.github/workflows/pr-manifest-validate.yml
vendored
54
.github/workflows/pr-manifest-validate.yml
vendored
@@ -1,54 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Manifests Validate
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/manifests/**"
|
||||
- .github/workflows/manifest-validate.yml
|
||||
workflow_dispatch:
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
MANIFEST_DIR: "manifests"
|
||||
|
||||
jobs:
|
||||
manifests-validate:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: changed files
|
||||
id: changed_files
|
||||
run: |
|
||||
set -xe
|
||||
changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
|
||||
grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
|
||||
echo "changed_folder: $changed_folder"
|
||||
if [ -z "$changed_folder" ]; then
|
||||
echo "No changes in manifests folder"
|
||||
echo "SKIP=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
echo "SKIP=false" >> $GITHUB_OUTPUT
|
||||
for folder in $changed_folder; do
|
||||
folder_str="$folder_str $folder/kubernetes/manifests/"
|
||||
done
|
||||
echo "folder_str=$folder_str"
|
||||
echo "folder_str=$folder_str" >> $GITHUB_ENV
|
||||
|
||||
- uses: docker://ghcr.io/yannh/kubeconform:latest
|
||||
if: steps.changed_files.outputs.SKIP == 'false'
|
||||
with:
|
||||
args: "-summary -output json ${{env.folder_str}}"
|
||||
1
.github/workflows/push-image-build.yml
vendored
1
.github/workflows/push-image-build.yml
vendored
@@ -9,7 +9,6 @@ on:
|
||||
paths:
|
||||
- "**.py"
|
||||
- "**Dockerfile"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
||||
|
||||
@@ -18,8 +18,6 @@ repos:
|
||||
SearchQnA/ui/svelte/tsconfig.json|
|
||||
DocSum/ui/svelte/tsconfig.json
|
||||
)$
|
||||
- id: check-yaml
|
||||
args: [--allow-multiple-documents]
|
||||
- id: debug-statements
|
||||
- id: requirements-txt-fixer
|
||||
- id: trailing-whitespace
|
||||
|
||||
@@ -103,4 +103,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain#5-customize-agent-strategy).
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
@@ -13,12 +13,16 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -14,7 +14,7 @@ We evaluate the WER (Word Error Rate) metric of the ASR microservice.
|
||||
|
||||
### Launch ASR microservice
|
||||
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps
|
||||
|
||||
@@ -51,7 +51,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
@@ -61,11 +61,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -4,7 +4,7 @@ This document outlines the deployment process for a AudioQnA application utilizi
|
||||
|
||||
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
|
||||
|
||||
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
|
||||
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
- whisper-gaudi: opea/whisper-gaudi:latest
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
||||
|
||||
|
||||
@@ -247,7 +247,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.2.0
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
|
||||
@@ -271,7 +271,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
@@ -303,6 +303,14 @@ spec:
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
@@ -315,7 +323,7 @@ spec:
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,8 @@ Quick Start Deployment Steps:
|
||||
2. Run Docker Compose.
|
||||
3. Consume the ChatQnA Service.
|
||||
|
||||
Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
|
||||
|
||||
### Quick Start: 1.Setup Environment Variable
|
||||
|
||||
To set up environment variables for deploying ChatQnA services, follow these steps:
|
||||
@@ -74,7 +76,7 @@ In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image.
|
||||
|
||||
- Use the latest or special version.
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
Please refer to the 'Build Docker Images' in [Guide](docker_compose/intel/cpu/xeon/README.md).
|
||||
|
||||
@@ -240,7 +242,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
||||
|
||||
### Deploy ChatQnA on AI PC
|
||||
|
||||
@@ -306,7 +308,7 @@ Two ways of consuming ChatQnA Service:
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
|
||||
|
||||
```bash
|
||||
http_proxy="" curl ${host_ip}:6006/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
|
||||
|
||||
@@ -29,6 +29,8 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c
|
||||
|
||||
## Getting Started
|
||||
|
||||
We recommend using Kubernetes to deploy the ChatQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs. Below is a description of Kubernetes deployment and benchmarking. For instructions on deploying and benchmarking with Docker, please refer to [this section](#benchmark-with-docker).
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
|
||||
@@ -88,7 +90,7 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL
|
||||
|
||||
### Benchmark tool preparation
|
||||
|
||||
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
|
||||
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/README.md) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
@@ -187,10 +189,13 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
|
||||
|
||||
###### 3.2 Run Benchmark Test
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[4, 8, 16, 640]"
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
@@ -237,20 +242,22 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[4, 8, 16, 1280]"
|
||||
````bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[1280, 1280, 1280, 1280]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
````
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
@@ -286,10 +293,13 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[4, 8, 16, 2560]"
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[2560, 2560, 2560, 2560]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
@@ -313,3 +323,80 @@ cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
|
||||
kubectl delete -f .
|
||||
kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
|
||||
```
|
||||
|
||||
## Benchmark with Docker
|
||||
|
||||
### Deploy ChatQnA service with Docker
|
||||
|
||||
In order to set up the environment correctly, you'll need to configure essential environment variables and, if applicable, proxy-related variables.
|
||||
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
#### Deploy ChatQnA on Gaudi
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Gaudi Guide](../../docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
|
||||
|
||||
#### Deploy ChatQnA on Xeon
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](../../docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
#### Deploy ChatQnA on NVIDIA GPU
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/nvidia/gpu/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for more instructions on building docker images from source.
|
||||
|
||||
### Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="docker"
|
||||
export SERVICE_IP = "ChatQnA Service IP"
|
||||
export SERVICE_PORT = "ChatQnA Service Port"
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
|
||||
### Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
|
||||
### Clean up
|
||||
|
||||
Take gaudi as example, use the below command to clean up system.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/docker_compose/intel/hpu/gaudi
|
||||
docker compose stop && docker compose rm -f
|
||||
echo y | docker system prune
|
||||
```
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
test_suite_config: # Overall configuration settings for the test suite
|
||||
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
||||
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
|
||||
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
|
||||
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
|
||||
concurrent_level: 5 # The concurrency level, adjustable based on requirements
|
||||
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||
|
||||
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v2
|
||||
name: chatqna-charts
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
38
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
38
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# ChatQnA Deployment
|
||||
|
||||
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Preparation
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
|
||||
|
||||
# Replace <your token> with your actual Hugging Face token and run the following command:
|
||||
HUGGINGFACE_TOKEN=<your token>
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
|
||||
|
||||
# Replace the following placeholders with the desired model IDs:
|
||||
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
|
||||
|
||||
```
|
||||
|
||||
### ChatQnA Installation
|
||||
|
||||
```bash
|
||||
# Deploy a ChatQnA pipeline using the specified YAML configuration.
|
||||
# To deploy with different configurations, simply provide a different YAML file.
|
||||
helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||
|
||||
# Tips: To display rendered manifests according to the given yaml.
|
||||
helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||
```
|
||||
|
||||
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
|
||||
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
|
||||
EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
|
||||
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
|
||||
INDEX_NAME: {{ .Values.config.INDEX_NAME }}
|
||||
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||
LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
|
||||
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||
REDIS_URL: {{ .Values.config.REDIS_URL }}
|
||||
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
|
||||
RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
|
||||
RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
|
||||
TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
|
||||
TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
|
||||
TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
|
||||
TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
|
||||
---
|
||||
@@ -0,0 +1,108 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- range $deployment := .Values.deployments }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ $deployment.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: {{ $deployment.spec.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ $deployment.name }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: {{ $deployment.name }}
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
{{- if $deployment.spec.args }}
|
||||
args:
|
||||
{{- range $arg := $deployment.spec.args }}
|
||||
{{- if $arg.name }}
|
||||
- {{ $arg.name }}
|
||||
{{- end }}
|
||||
{{- if $arg.value }}
|
||||
- "{{ $arg.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.env }}
|
||||
env:
|
||||
{{- range $env := $deployment.spec.env }}
|
||||
- name: {{ $env.name }}
|
||||
value: "{{ $env.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: {{ $deployment.name }}
|
||||
|
||||
{{- if $deployment.spec.ports }}
|
||||
ports:
|
||||
{{- range $port := $deployment.spec.ports }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
- {{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.resources }}
|
||||
resources:
|
||||
{{- range $resourceType, $resource := $deployment.spec.resources }}
|
||||
{{ $resourceType }}:
|
||||
{{- range $limitType, $limit := $resource }}
|
||||
{{ $limitType }}: {{ $limit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- range $volumeMount := $deployment.spec.volumeMounts }}
|
||||
- mountPath: {{ $volumeMount.mountPath }}
|
||||
name: {{ $volumeMount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: {{ $deployment.name }}
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
|
||||
|
||||
{{- if $deployment.spec.volumes }}
|
||||
volumes:
|
||||
{{- range $index, $volume := $deployment.spec.volumes }}
|
||||
- name: {{ $volume.name }}
|
||||
{{- if $volume.hostPath }}
|
||||
hostPath:
|
||||
path: {{ $volume.hostPath.path }}
|
||||
type: {{ $volume.hostPath.type }}
|
||||
{{- else if $volume.emptyDir }}
|
||||
emptyDir:
|
||||
medium: {{ $volume.emptyDir.medium }}
|
||||
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
---
|
||||
{{- end }}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- range $service := .Values.services }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ $service.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
{{- range $port := $service.spec.ports }}
|
||||
- name: {{ $port.name }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
{{- if ne $port_name "name"}}
|
||||
{{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
selector:
|
||||
app: {{ $service.spec.selector.app }}
|
||||
type: {{ $service.spec.type }}
|
||||
---
|
||||
{{- end }}
|
||||
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
@@ -0,0 +1,259 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 2
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
requests:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 2
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "4000Mi"
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
requests:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "1280"
|
||||
- name: "--max-total-tokens"
|
||||
value: "2048"
|
||||
- name: "--max-batch-total-tokens"
|
||||
value: "65536"
|
||||
- name: "--max-batch-prefill-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -19,7 +19,7 @@ opea_micro_services:
|
||||
tei-embedding-service:
|
||||
host: ${TEI_EMBEDDING_SERVICE_IP}
|
||||
ports: ${TEI_EMBEDDING_SERVICE_PORT}
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -48,7 +48,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -56,10 +56,13 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
35
ChatQnA/docker_compose/install_docker.sh
Normal file
35
ChatQnA/docker_compose/install_docker.sh
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Update the package index
|
||||
sudo apt-get -y update
|
||||
|
||||
# Install prerequisites
|
||||
sudo apt-get -y install ca-certificates curl
|
||||
|
||||
# Create the directory for the Docker GPG key
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
|
||||
# Add Docker's official GPG key
|
||||
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||
|
||||
# Set permissions for the GPG key
|
||||
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||||
|
||||
# Add Docker repository to the sources list
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
# Update the package index with Docker packages
|
||||
sudo apt-get -y update
|
||||
|
||||
# Install Docker packages
|
||||
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
|
||||
# add existing user
|
||||
sudo usermod -aG docker $USER
|
||||
|
||||
# Optional: Verify that Docker is installed correctly
|
||||
sudo docker --version
|
||||
@@ -11,36 +11,84 @@ git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
If you are in a proxy environment, set the proxy-related environment variables:
|
||||
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/embedding-tei:latest -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest -f comps/reranks/tei/Dockerfile .
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
### 4. Set up Ollama Service and Build LLM Image
|
||||
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC. Please pre-download Ollama on your PC.
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
|
||||
|
||||
Please set up Ollama on your PC follow the instructions. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
|
||||
|
||||
#### 4.1 Set Up Ollama LLM Service
|
||||
|
||||
Install Ollama service with one command
|
||||
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
##### Set Ollama Service Configuration
|
||||
|
||||
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment (Replace **${host_ip}** with your host IPV4).
|
||||
|
||||
```
|
||||
Environment="OLLAMA_HOST=${host_ip}:11434"
|
||||
```
|
||||
|
||||
##### Set https_proxy environment for Ollama
|
||||
|
||||
if your system access network through proxy, add https_proxy in Ollama Service Configuration file
|
||||
|
||||
```
|
||||
Environment="https_proxy="Your_HTTPS_Proxy"
|
||||
```
|
||||
|
||||
##### Restart Ollam services
|
||||
|
||||
```
|
||||
$ sudo systemctl daemon-reload
|
||||
$ sudo systemctl restart ollama.service
|
||||
```
|
||||
|
||||
##### Pull LLM model
|
||||
|
||||
```
|
||||
#export OLLAMA_HOST=http://${host_ip}:11434
|
||||
#ollama pull llam3
|
||||
#ollama lists
|
||||
NAME ID SIZE MODIFIED
|
||||
llama3:latest 365c0bd3c000 4.7 GB 5 days ago
|
||||
```
|
||||
|
||||
#### 4.2 Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-ollama:latest -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
@@ -61,7 +109,7 @@ Build frontend Docker image via below command:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest -f ./docker/Dockerfile .
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image.
|
||||
|
||||
- Use the latest or special version.
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
Please refer to 'Build Docker Images' in below.
|
||||
|
||||
@@ -233,7 +233,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
model_name="Intel/neural-chat-7b-v3-3"
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id $model_name
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
|
||||
```
|
||||
|
||||
2. Offline
|
||||
@@ -247,7 +247,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export model_path="/path/to/model"
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id /data
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
|
||||
```
|
||||
|
||||
### Setup Environment Variables
|
||||
|
||||
@@ -69,7 +69,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "6041:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "6006:80"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
|
||||
@@ -10,6 +10,7 @@ export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export REDIS_HOST=${host_ip}
|
||||
|
||||
@@ -52,7 +52,7 @@ In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image.
|
||||
|
||||
- Use the latest or special version.
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
Please refer to 'Build Docker Images' in below.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -108,7 +108,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
@@ -118,11 +118,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tgi-guardrails-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-guardrails-server
|
||||
ports:
|
||||
- "8088:80"
|
||||
@@ -35,11 +35,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
@@ -60,7 +64,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -141,7 +145,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -151,11 +155,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -108,7 +108,7 @@ services:
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
@@ -118,11 +118,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
@@ -85,11 +85,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -56,16 +56,16 @@ f810f3b4d329 opea/embedding-tei:latest "python e
|
||||
2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server
|
||||
69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server
|
||||
313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
174bd43fa6b5 opea/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
||||
74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db
|
||||
88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server
|
||||
```
|
||||
|
||||
In this case, `ghcr.io/huggingface/tgi-gaudi:1.2.1` Existed.
|
||||
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed.
|
||||
|
||||
```
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
```
|
||||
|
||||
Next we can check the container logs to get to know what happened during the docker start.
|
||||
@@ -76,7 +76,7 @@ Check the log of container by:
|
||||
|
||||
`docker logs <CONTAINER ID> -t`
|
||||
|
||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:1.2.1`
|
||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
|
||||
`docker logs 05c40b636239 -t`
|
||||
|
||||
@@ -105,7 +105,7 @@ So just make sure the devices are available.
|
||||
Here is another failure example:
|
||||
|
||||
```
|
||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||
```
|
||||
|
||||
Check the log by `docker logs f7a08f9867f9 -t`.
|
||||
@@ -122,7 +122,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
||||
|
||||
```
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -131,9 +131,13 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -52,7 +52,7 @@ In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image.
|
||||
|
||||
- Use the latest or special version.
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
Please refer to 'Build Docker Images' in below.
|
||||
|
||||
|
||||
@@ -125,12 +125,6 @@ services:
|
||||
dockerfile: comps/guardrails/llama_guard/langchain/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/guardrails-tgi:${TAG:-latest}
|
||||
tei-gaudi:
|
||||
build:
|
||||
context: tei-gaudi
|
||||
dockerfile: Dockerfile-hpu
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
vllm:
|
||||
build:
|
||||
context: vllm
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
## Deploy On Xeon
|
||||
|
||||
```
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifests
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml
|
||||
kubectl apply -f chatqna.yaml
|
||||
@@ -33,7 +33,7 @@ kubectl apply -f chatqna_bf16.yaml
|
||||
## Deploy On Gaudi
|
||||
|
||||
```
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifests
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml
|
||||
kubectl apply -f chatqna.yaml
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
|
||||
|
||||
The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||
The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector (GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
|
||||
|
||||
The ChatQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the ChatQnA RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `embedding`, `retriever`, `rerank`, and `llm`.
|
||||
@@ -27,8 +27,8 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tei-embedding-service: opea/tei-gaudi:latest
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- tei-embedding-service: ghcr.io/huggingface/tei-gaudi:latest
|
||||
- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
|
||||
> [NOTE]
|
||||
> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/hpu/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
|
||||
|
||||
@@ -1474,7 +1474,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1554,7 +1554,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -1294,7 +1294,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
|
||||
image: "ghcr.io/huggingface/tei-gaudi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--auto-truncate"
|
||||
@@ -1477,7 +1477,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1558,7 +1558,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -1115,7 +1115,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
|
||||
image: "ghcr.io/huggingface/tei-gaudi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--auto-truncate"
|
||||
@@ -1298,7 +1298,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna-guardrails chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi tei-gaudi guardrails-tgi"
|
||||
service_list="chatqna-guardrails chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi guardrails-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna-no-wrapper chatqna-ui dataprep-redis retriever-redis tei-gaudi"
|
||||
service_list="chatqna-no-wrapper chatqna-ui dataprep-redis retriever-redis"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-no-wrapper chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi tei-gaudi nginx"
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,13 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei tei-gaudi llm-vllm-hpu llm-vllm"
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm-hpu llm-vllm"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm vllm"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,13 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei tei-gaudi llm-vllm-ray-hpu llm-vllm-ray"
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm-ray-hpu llm-vllm-ray"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna-without-rerank chatqna-ui dataprep-redis embedding-tei retriever-redis llm-tgi tei-gaudi"
|
||||
service_list="chatqna-without-rerank chatqna-ui dataprep-redis embedding-tei retriever-redis llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis embedding-tei retriever-redis llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -106,7 +106,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [CodeGen helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [CodeGen helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi.
|
||||
|
||||
## Consume CodeGen Service
|
||||
|
||||
@@ -128,7 +128,7 @@ Two ways of consuming CodeGen Service:
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
|
||||
|
||||
```bash
|
||||
http_proxy=""
|
||||
|
||||
@@ -8,7 +8,7 @@ We evaluate accuracy by [bigcode-evaluation-harness](https://github.com/bigcode-
|
||||
|
||||
### Launch CodeGen microservice
|
||||
|
||||
Please refer to [CodeGen Examples](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen), follow the guide to deploy CodeGen megeservice.
|
||||
Please refer to [CodeGen Examples](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen/README.md), follow the guide to deploy CodeGen megeservice.
|
||||
|
||||
Use `curl` command to test codegen service and ensure that it has started properly
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -17,7 +17,11 @@ opea_micro_services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8028:80"
|
||||
@@ -15,7 +15,11 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
This document outlines the deployment process for a Code Generation (CodeGen) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
|
||||
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||
|
||||
If you have only Intel Xeon machines you could use the codegen_xeon.yaml file or if you have a Gaudi cluster you could use codegen_gaudi.yaml
|
||||
In the below example we illustrate on Xeon.
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="codegen codegen-ui llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ Refer to the [Code Translation Kubernetes Guide](./kubernetes/intel/README.md)
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [CodeTrans helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans) for instructions on deploying CodeTrans into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [CodeTrans helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) for instructions on deploying CodeTrans into Kubernetes on Xeon & Gaudi.
|
||||
|
||||
## Consume Code Translation Service
|
||||
|
||||
@@ -121,7 +121,7 @@ By default, the UI runs on port 5173 internally.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeTrans/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeTrans/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
|
||||
|
||||
```bash
|
||||
http_proxy=""
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -17,7 +17,11 @@ opea_micro_services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: codetrans-tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -15,7 +15,11 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
This document outlines the deployment process for a Code Translation (CodeTran) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
|
||||
|
||||
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||
|
||||
If you have only Intel Xeon machines you could use the codetrans_xeon.yaml file or if you have a Gaudi cluster you could use codetrans_gaudi.yaml
|
||||
In the below example we illustrate on Xeon.
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="codetrans codetrans-ui llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,22 @@
|
||||
# DocRetriever Application
|
||||
|
||||
DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
|
||||
DocRetriever is the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
|
||||
|
||||
## We provided DocRetriever with different deployment infra
|
||||
|
||||
- [docker xeon version](docker_compose/intel/cpu/xeon/) => minimum endpoints, easy to setup
|
||||
- [docker gaudi version](docker_compose/intel/hpu/gaudi/) => with extra tei_gaudi endpoint, faster
|
||||
- [docker xeon version](docker_compose/intel/cpu/xeon/README.md) => minimum endpoints, easy to setup
|
||||
- [docker gaudi version](docker_compose/intel/hpu/gaudi/README.md) => with extra tei_gaudi endpoint, faster
|
||||
|
||||
## We allow users to set retriever/reranker hyperparams via requests
|
||||
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port)
|
||||
payload = {
|
||||
"messages": query,
|
||||
"k": 5, # retriever top k
|
||||
"top_n": 2, # reranker top n
|
||||
}
|
||||
response = requests.post(url, json=payload)
|
||||
```
|
||||
|
||||
@@ -79,13 +79,26 @@ Retrieval from KnowledgeBase
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Explain the OPEA project?"
|
||||
"messages": "Explain the OPEA project?"
|
||||
}'
|
||||
|
||||
# expected output
|
||||
{"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
|
||||
```
|
||||
|
||||
**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below.
|
||||
|
||||
1. retriever
|
||||
* search_type: str = "similarity"
|
||||
* k: int = 4
|
||||
* distance_threshold: Optional[float] = None
|
||||
* fetch_k: int = 20
|
||||
* lambda_mult: float = 0.5
|
||||
* score_threshold: float = 0.2
|
||||
|
||||
2. reranker
|
||||
* top_n: int = 1
|
||||
|
||||
## 5. Trouble shooting
|
||||
|
||||
1. check all containers are alive
|
||||
|
||||
@@ -74,13 +74,30 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
entrypoint: python local_reranking.py
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -80,13 +80,26 @@ Retrieval from KnowledgeBase
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Explain the OPEA project?"
|
||||
"messages": "Explain the OPEA project?"
|
||||
}'
|
||||
|
||||
# expected output
|
||||
{"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
|
||||
```
|
||||
|
||||
**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below.
|
||||
|
||||
1. retriever
|
||||
* search_type: str = "similarity"
|
||||
* k: int = 4
|
||||
* distance_threshold: Optional[float] = None
|
||||
* fetch_k: int = 20
|
||||
* lambda_mult: float = 0.5
|
||||
* score_threshold: float = 0.2
|
||||
|
||||
2. reranker
|
||||
* top_n: int = 1
|
||||
|
||||
## 5. Trouble shooting
|
||||
|
||||
1. check all containers are alive
|
||||
|
||||
@@ -28,7 +28,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -77,13 +77,30 @@ services:
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-gaudi-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
entrypoint: python local_reranking.py
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -35,9 +35,3 @@ services:
|
||||
dockerfile: comps/dataprep/redis/langchain/Dockerfile
|
||||
extends: doc-index-retriever
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
tei-gaudi:
|
||||
build:
|
||||
context: tei-gaudi
|
||||
dockerfile: Dockerfile-hpu
|
||||
extends: doc-index-retriever
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
|
||||
71
DocIndexRetriever/tests/test.py
Normal file
71
DocIndexRetriever/tests/test.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def search_knowledge_base(query: str, url: str, request_type="chat_completion") -> str:
|
||||
"""Search the knowledge base for a specific query."""
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
if request_type == "chat_completion":
|
||||
print("Sending chat completion request")
|
||||
payload = {
|
||||
"messages": query,
|
||||
"k": 5,
|
||||
"top_n": 2,
|
||||
}
|
||||
else:
|
||||
print("Sending text request")
|
||||
payload = {
|
||||
"text": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
print(response)
|
||||
if "documents" in response.json():
|
||||
docs = response.json()["documents"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = str(i) + ": " + doc
|
||||
else:
|
||||
context += "\n" + str(i) + ": " + doc
|
||||
# print(context)
|
||||
return context
|
||||
elif "text" in response.json():
|
||||
return response.json()["text"]
|
||||
elif "reranked_docs" in response.json():
|
||||
docs = response.json()["reranked_docs"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc["text"]
|
||||
else:
|
||||
context += "\n" + doc["text"]
|
||||
# print(context)
|
||||
return context
|
||||
else:
|
||||
return "Error parsing response from the knowledge base."
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Index data")
|
||||
parser.add_argument("--host_ip", type=str, default="localhost", help="Host IP")
|
||||
parser.add_argument("--port", type=int, default=8889, help="Port")
|
||||
parser.add_argument("--request_type", type=str, default="chat_completion", help="Test type")
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
|
||||
host_ip = args.host_ip
|
||||
port = args.port
|
||||
url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port)
|
||||
|
||||
response = search_knowledge_base("OPEA", url, request_type=args.request_type)
|
||||
|
||||
print(response)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -19,14 +19,12 @@ function build_docker_images() {
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
fi
|
||||
if [ ! -d "tei-gaudi" ] ; then
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
fi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull redis/redis-stack:7.2.0-v9
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -66,7 +64,7 @@ function validate() {
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
echo "Testing DataPrep Service"
|
||||
echo "=========Ingest data=================="
|
||||
local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev"]')
|
||||
@@ -80,7 +78,7 @@ function validate_megaservice() {
|
||||
fi
|
||||
|
||||
# Curl the Mega Service
|
||||
echo "Testing retriever service"
|
||||
echo "==============Testing retriever service: Text Request================="
|
||||
local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Explain the OPEA project?"
|
||||
}')
|
||||
@@ -95,6 +93,21 @@ function validate_megaservice() {
|
||||
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "==============Testing retriever service: ChatCompletion Request================"
|
||||
cd $WORKPATH/tests
|
||||
local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-gaudi")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs tei-embedding-gaudi-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
|
||||
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
|
||||
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
|
||||
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
|
||||
@@ -63,8 +63,8 @@ function validate() {
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
echo "Testing DataPrep Service"
|
||||
local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
|
||||
echo "===========Ingest data=================="
|
||||
local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev"]')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
|
||||
@@ -77,16 +77,32 @@ function validate_megaservice() {
|
||||
fi
|
||||
|
||||
# Curl the Mega Service
|
||||
echo "Testing retriever service"
|
||||
echo "================Testing retriever service: Default params================"
|
||||
|
||||
local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Explain the OPEA project?"
|
||||
"messages": "Explain the OPEA project?"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs tei-embedding-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "================Testing retriever service: ChatCompletion Request================"
|
||||
cd $WORKPATH/tests
|
||||
local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
|
||||
@@ -21,7 +21,7 @@ Currently we support two ways of deploying Document Summarization services with
|
||||
docker pull opea/docsum:latest
|
||||
```
|
||||
|
||||
2. Start services using the docker images `built from source`: [Guide](./docker_compose)
|
||||
2. Start services using the docker images `built from source`: [Guide](https://github.com/opea-project/GenAIExamples/tree/main/DocSum/docker_compose)
|
||||
|
||||
### Required Models
|
||||
|
||||
@@ -98,7 +98,7 @@ Refer to [Kubernetes deployment](./kubernetes/intel/README.md)
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [DocSum helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum) for instructions on deploying DocSum into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [DocSum helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) for instructions on deploying DocSum into Kubernetes on Xeon & Gaudi.
|
||||
|
||||
### Workflow of the deployed Document Summarization Service
|
||||
|
||||
@@ -143,7 +143,7 @@ Two ways of consuming Document Summarization Service:
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/DocSum/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/DocSum/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
|
||||
|
||||
```bash
|
||||
http_proxy=""
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user