Compare commits
15 Commits
v0.9rc
...
update_ima
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a705ad3d4 | ||
|
|
32afb6501c | ||
|
|
035f39f0d9 | ||
|
|
6f3e54a22a | ||
|
|
1874dfd148 | ||
|
|
7a0fca73e6 | ||
|
|
beda609b4b | ||
|
|
993688ac91 | ||
|
|
5fde666c43 | ||
|
|
4133757642 | ||
|
|
10c81f1c57 | ||
|
|
dad8eb4b82 | ||
|
|
af21e94a29 | ||
|
|
f78aa9ee2f | ||
|
|
c25063f4bb |
3
.github/workflows/manual-bom-scan.yml
vendored
3
.github/workflows/manual-bom-scan.yml
vendored
@@ -59,6 +59,7 @@ jobs:
|
||||
|
||||
- name: SBOM Scan Container
|
||||
uses: anchore/sbom-action@v0.17.1
|
||||
if: always()
|
||||
with:
|
||||
image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
output-file: ${{ matrix.image }}-sbom-scan.txt
|
||||
@@ -66,6 +67,7 @@ jobs:
|
||||
|
||||
- name: Security Scan Container
|
||||
uses: aquasecurity/trivy-action@0.24.0
|
||||
if: always()
|
||||
with:
|
||||
image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
output: ${{ matrix.image }}-trivy-scan.txt
|
||||
@@ -80,6 +82,7 @@ jobs:
|
||||
run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
|
||||
- uses: actions/upload-artifact@v4.3.4
|
||||
if: always()
|
||||
with:
|
||||
name: ${{ matrix.image }}-scan
|
||||
path: ${{ matrix.image }}-*-scan.txt
|
||||
|
||||
34
.github/workflows/manual-docker-publish.yml
vendored
34
.github/workflows/manual-docker-publish.yml
vendored
@@ -5,28 +5,28 @@ name: Examples publish docker image on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
nodes:
|
||||
node:
|
||||
default: "gaudi"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
default: "Translation"
|
||||
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
publish:
|
||||
gmc:
|
||||
default: false
|
||||
description: 'Publish images to docker hub'
|
||||
description: 'Publish gmc images'
|
||||
required: false
|
||||
type: boolean
|
||||
tag:
|
||||
default: "v0.9"
|
||||
description: "Tag to publish"
|
||||
required: true
|
||||
type: string
|
||||
publish_tags:
|
||||
default: "latest,v1.0"
|
||||
default: "latest,v0.9"
|
||||
description: 'Tag list apply to publish images'
|
||||
required: false
|
||||
type: string
|
||||
@@ -34,7 +34,7 @@ on:
|
||||
permissions: read-all
|
||||
jobs:
|
||||
get-image-list:
|
||||
runs-on: ${{ inputs.node }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.scan-matrix.outputs.matrix }}
|
||||
steps:
|
||||
@@ -44,13 +44,20 @@ jobs:
|
||||
- name: Set Matrix
|
||||
id: scan-matrix
|
||||
run: |
|
||||
pip install yq
|
||||
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
|
||||
image_list=[]
|
||||
for example in ${examples[@]}
|
||||
do
|
||||
echo ${example}
|
||||
images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
|
||||
image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
|
||||
done
|
||||
|
||||
if [ "${{ inputs.gmc }}" == "true" ]; then
|
||||
image_list=$(echo ${image_list} | jq -c '. + ["gmcmanager","gmcrouter"]')
|
||||
fi
|
||||
echo $image_list
|
||||
echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
publish:
|
||||
@@ -60,6 +67,11 @@ jobs:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Image Publish
|
||||
uses: opea-project/validation/actions/image-publish@main
|
||||
with:
|
||||
|
||||
43
.github/workflows/manual-freeze-images.yml
vendored
43
.github/workflows/manual-freeze-images.yml
vendored
@@ -1,43 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Freeze base images and 3rd party images on manual event
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
freeze-images:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.ref }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: install skopeo
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt -y install skopeo
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name "NeuralChatBot"
|
||||
git config --global user.email "grp_neural_chat_bot@intel.com"
|
||||
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
bash .github/workflows/scripts/freeze_images.sh
|
||||
|
||||
- name: Commit changes
|
||||
run: |
|
||||
git add .
|
||||
git commit -s -m "Freeze third party images tag"
|
||||
git push
|
||||
@@ -4,14 +4,18 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
declare -A dict
|
||||
dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
|
||||
dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference"
|
||||
dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"
|
||||
|
||||
function get_latest_version() {
|
||||
repo_image=$1
|
||||
versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
|
||||
printf "version list:\n$versions\n"
|
||||
latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
|
||||
if [[ $repo_image == *"huggingface"* ]]; then
|
||||
revision=$(skopeo inspect --config ${dict[$repo_image]} | jq -r '.config.Labels["org.opencontainers.image.revision"][:7]')
|
||||
latest_version="sha-$revision-intel-cpu"
|
||||
else
|
||||
versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
|
||||
printf "version list:\n$versions\n"
|
||||
latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
|
||||
fi
|
||||
echo "latest version: $latest_version"
|
||||
replace_image_version $repo_image $latest_version
|
||||
}
|
||||
@@ -22,10 +26,10 @@ function replace_image_version() {
|
||||
if [[ -z "$version" ]]; then
|
||||
echo "version is empty"
|
||||
else
|
||||
echo "replace $repo_image:latest with $repo_image:$version"
|
||||
find . -name "Dockerfile" | xargs sed -i "s|$repo_image:latest.*|$repo_image:$version|g"
|
||||
find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
echo "replace $repo_image:tag with $repo_image:$version"
|
||||
find . -name "Dockerfile" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
find . -name "*.yaml" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
find . -name "*.md" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
fi
|
||||
}
|
||||
|
||||
54
.github/workflows/weekly-update-images.yml
vendored
Normal file
54
.github/workflows/weekly-update-images.yml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Weekly update base images and 3rd party images
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
freeze-images:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
USER_NAME: "NeuralChatBot"
|
||||
USER_EMAIL: "grp_neural_chat_bot@intel.com"
|
||||
BRANCH_NAME: "update_images_tag"
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: "main"
|
||||
|
||||
- name: Install skopeo
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt -y install skopeo
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name ${{ env.USER_NAME }}
|
||||
git config --global user.email ${{ env.USER_EMAIL }}
|
||||
git remote set-url origin https://${{ env.USER_NAME }}:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
git checkout -b ${{ env.BRANCH_NAME }}
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
bash .github/workflows/scripts/update_images_tag.sh
|
||||
|
||||
- name: Commit changes
|
||||
run: |
|
||||
git add .
|
||||
git commit -s -m "Update third party images tag"
|
||||
git push --set-upstream origin update_images_tag
|
||||
|
||||
- name: create pull request
|
||||
run: gh pr create -B main -H ${{ env.BRANCH_NAME }} --title 'Update ghcr.io/huggingface/text-generation-inference image tag' --body 'Created by Github action'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.ACTION_TOKEN }}
|
||||
@@ -26,50 +26,50 @@ This example showcases a hierarchical multi-agent system for question-answering
|
||||
1. Build agent docker image </br>
|
||||
First, clone the opea GenAIComps repo
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
```
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
```
|
||||
|
||||
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
|
||||
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
|
||||
|
||||
```
|
||||
cd GenAIComps
|
||||
docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
|
||||
```
|
||||
```
|
||||
cd GenAIComps
|
||||
docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
|
||||
```
|
||||
|
||||
2. Launch tool services </br>
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
|
||||
3. Set up environment for this example </br>
|
||||
First, clone this repo
|
||||
|
||||
```
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
```
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
Second, set up env vars
|
||||
Second, set up env vars
|
||||
|
||||
```
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# optional: OPANAI_API_KEY
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
```
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# optional: OPANAI_API_KEY
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
4. Launch agent services</br>
|
||||
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
|
||||
To use openai llm, run command below.
|
||||
|
||||
```
|
||||
cd docker/openai/
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
```
|
||||
cd docker/openai/
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
## Validate services
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker/gaudi/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker/xeon/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
@@ -41,7 +41,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -15,19 +15,19 @@ The AudioQnA application is defined as a Custom Resource (CR) file that the abov
|
||||
The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
- llm: opea/llm-tgi:v0.9
|
||||
- asr: opea/asr:v0.9
|
||||
- whisper: opea/whisper:v0.9
|
||||
- tts: opea/tts:v0.9
|
||||
- speecht5: opea/speecht5:v0.9
|
||||
- llm: opea/llm-tgi:latest
|
||||
- asr: opea/asr:latest
|
||||
- whisper: opea/whisper:latest
|
||||
- tts: opea/tts:latest
|
||||
- speecht5: opea/speecht5:latest
|
||||
|
||||
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- whisper-gaudi: opea/whisper-gaudi:v0.9
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:v0.9
|
||||
- whisper-gaudi: opea/whisper-gaudi:latest
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
||||
|
||||
> [NOTE]
|
||||
> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
|
||||
@@ -66,7 +66,7 @@ This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.
|
||||
```sh
|
||||
export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n audioqa -- curl $accessUrl -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
|
||||
kubectl exec "$CLIENT_POD" -n audioqa -- curl -s --no-buffer $accessUrl -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
> [NOTE]
|
||||
|
||||
@@ -50,7 +50,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/asr:v0.9
|
||||
image: opea/asr:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: asr-deploy
|
||||
args: null
|
||||
@@ -101,7 +101,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/whisper-gaudi:v0.9
|
||||
image: opea/whisper-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: whisper-deploy
|
||||
args: null
|
||||
@@ -164,7 +164,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/tts:v0.9
|
||||
image: opea/tts:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: tts-deploy
|
||||
args: null
|
||||
@@ -215,7 +215,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/speecht5-gaudi:v0.9
|
||||
image: opea/speecht5-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: speecht5-deploy
|
||||
args: null
|
||||
@@ -365,7 +365,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/llm-tgi:v0.9
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
@@ -416,7 +416,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/audioqna:v0.9
|
||||
image: opea/audioqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: audioqna-backend-server-deploy
|
||||
args: null
|
||||
|
||||
@@ -50,7 +50,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/asr:v0.9
|
||||
image: opea/asr:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: asr-deploy
|
||||
args: null
|
||||
@@ -101,7 +101,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/whisper:v0.9
|
||||
image: opea/whisper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: whisper-deploy
|
||||
args: null
|
||||
@@ -152,7 +152,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/tts:v0.9
|
||||
image: opea/tts:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: tts-deploy
|
||||
args: null
|
||||
@@ -203,7 +203,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/speecht5:v0.9
|
||||
image: opea/speecht5:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: speecht5-deploy
|
||||
args: null
|
||||
@@ -321,7 +321,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/llm-tgi:v0.9
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
@@ -372,7 +372,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/audioqna:v0.9
|
||||
image: opea/audioqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: audioqna-backend-server-deploy
|
||||
args: null
|
||||
|
||||
@@ -161,7 +161,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/gaudi/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
|
||||
@@ -174,7 +174,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/xeon/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
|
||||
@@ -183,7 +183,7 @@ Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on buil
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/gpu/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for more instructions on building docker images from source.
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:v0.9
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:v0.9
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:v0.9
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:v0.9
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
|
||||
@@ -31,7 +31,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:v0.9
|
||||
image: opea/tei-gaudi:latest
|
||||
name: reranking-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:v0.9
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
args: null
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:v0.9
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:v0.9
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:v0.9
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:v0.9
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:v0.9
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
|
||||
@@ -31,7 +31,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:v0.9
|
||||
image: opea/tei-gaudi:latest
|
||||
name: reranking-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:v0.9
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
args: null
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:v0.9
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:v0.9
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:v0.9
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:v0.9
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:v0.9
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
|
||||
@@ -31,7 +31,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:v0.9
|
||||
image: opea/tei-gaudi:latest
|
||||
name: reranking-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
|
||||
@@ -32,7 +32,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:v0.9
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
args: null
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:v0.9
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -160,11 +160,11 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/aipc/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
|
||||
# let ollama service runs
|
||||
# e.g. ollama run llama3
|
||||
ollama run $OLLAMA_MODEL
|
||||
OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
|
||||
# for windows
|
||||
# ollama run %OLLAMA_MODEL%
|
||||
```
|
||||
|
||||
@@ -211,26 +211,26 @@ cd GenAIExamples/ChatQnA/docker/gaudi/
|
||||
If use tgi for llm backend.
|
||||
|
||||
```bash
|
||||
TAG=v0.9 docker compose -f compose.yaml up -d
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
If use vllm for llm backend.
|
||||
|
||||
```bash
|
||||
TAG=v0.9 docker compose -f compose_vllm.yaml up -d
|
||||
docker compose -f compose_vllm.yaml up -d
|
||||
```
|
||||
|
||||
If use vllm-on-ray for llm backend.
|
||||
|
||||
```bash
|
||||
TAG=v0.9 docker compose -f compose_vllm_ray.yaml up -d
|
||||
docker compose -f compose_vllm_ray.yaml up -d
|
||||
```
|
||||
|
||||
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/gaudi/
|
||||
TAG=v0.9 docker compose -f compose_guardrails.yaml up -d
|
||||
docker compose -f compose_guardrails.yaml up -d
|
||||
```
|
||||
|
||||
> **_NOTE:_** Users need at least two Gaudi cards to run the ChatQnA successfully.
|
||||
|
||||
@@ -17,7 +17,7 @@ start the docker containers
|
||||
|
||||
```
|
||||
cd ./GenAIExamples/ChatQnA/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Check the start up log by `docker compose -f ./docker/gaudi/compose.yaml logs`.
|
||||
@@ -149,7 +149,7 @@ Set the LLM_MODEL_ID then restart the containers.
|
||||
Also you can check overall logs with the following command, where the compose.yaml is the mega service docker-compose configuration file.
|
||||
|
||||
```
|
||||
TAG=v0.9 docker compose -f ./docker-composer/gaudi/compose.yaml logs
|
||||
docker compose -f ./docker-composer/gaudi/compose.yaml logs
|
||||
```
|
||||
|
||||
## 4. Check each micro service used by the Mega Service
|
||||
|
||||
@@ -121,7 +121,7 @@ Note: Please replace with `host_ip` with you external IP address, do **NOT** use
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/gpu/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate MicroServices and MegaService
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
CHAT_BASE_URL = 'http://backend_address:8888/v1/chatqna'
|
||||
CHAT_BASE_URL = '/v1/chatqna'
|
||||
|
||||
UPLOAD_FILE_BASE_URL = 'http://backend_address:6007/v1/dataprep'
|
||||
UPLOAD_FILE_BASE_URL = '/v1/dataprep'
|
||||
|
||||
GET_FILE = 'http://backend_address:6007/v1/dataprep/get_file'
|
||||
GET_FILE = '/v1/dataprep/get_file'
|
||||
|
||||
DELETE_FILE = 'http://backend_address:6007/v1/dataprep/delete_file'
|
||||
DELETE_FILE = '/v1/dataprep/delete_file'
|
||||
|
||||
@@ -226,13 +226,13 @@ cd GenAIExamples/ChatQnA/docker/xeon/
|
||||
If use TGI backend.
|
||||
|
||||
```bash
|
||||
TAG=v0.9 docker compose -f compose.yaml up -d
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
If use vLLM backend.
|
||||
|
||||
```bash
|
||||
TAG=v0.9 docker compose -f compose_vllm.yaml up -d
|
||||
docker compose -f compose_vllm.yaml up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -205,7 +205,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/xeon/
|
||||
TAG=v0.9 docker compose -f compose_qdrant.yaml up -d
|
||||
docker compose -f compose_qdrant.yaml up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -103,7 +103,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
|
||||
@@ -102,7 +102,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "6042:80"
|
||||
|
||||
@@ -70,7 +70,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
|
||||
@@ -16,18 +16,18 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
|
||||
- redis-vector-db: redis/redis-stack:7.2.0-v9
|
||||
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- embedding: opea/embedding-tei:v0.9
|
||||
- retriever: opea/retriever-redis:v0.9
|
||||
- embedding: opea/embedding-tei:latest
|
||||
- retriever: opea/retriever-redis:latest
|
||||
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- reranking: opea/reranking-tei:v0.9
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
- llm: opea/llm-tgi:v0.9
|
||||
- chaqna-xeon-backend-server: opea/chatqna:v0.9
|
||||
- reranking: opea/reranking-tei:latest
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
- llm: opea/llm-tgi:latest
|
||||
- chaqna-xeon-backend-server: opea/chatqna:latest
|
||||
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tei-embedding-service: opea/tei-gaudi:v0.9
|
||||
- tei-embedding-service: opea/tei-gaudi:latest
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
|
||||
> [NOTE]
|
||||
@@ -67,7 +67,7 @@ This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.ya
|
||||
```sh
|
||||
export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
|
||||
kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. Perhaps you want to try another LLM model? Just modify the application custom resource to use another LLM model
|
||||
@@ -98,7 +98,7 @@ This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.ya
|
||||
9. Access the updated pipeline using the same URL from above using the client pod
|
||||
|
||||
```sh
|
||||
kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
|
||||
kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
> [NOTE]
|
||||
|
||||
@@ -501,7 +501,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/dataprep-redis:v0.9"
|
||||
image: "opea/dataprep-redis:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: data-prep
|
||||
@@ -579,7 +579,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/embedding-tei:v0.9"
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
@@ -657,7 +657,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -807,7 +807,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:v0.9"
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
@@ -885,7 +885,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/retriever-redis:v0.9"
|
||||
image: "opea/retriever-redis:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: retriever-usvc
|
||||
@@ -1212,7 +1212,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/chatqna:v0.9"
|
||||
image: "opea/chatqna:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -500,7 +500,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/dataprep-redis:v0.9"
|
||||
image: "opea/dataprep-redis:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: data-prep
|
||||
@@ -578,7 +578,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/embedding-tei:v0.9"
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
@@ -656,7 +656,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -806,7 +806,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:v0.9"
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
@@ -884,7 +884,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/retriever-redis:v0.9"
|
||||
image: "opea/retriever-redis:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: retriever-usvc
|
||||
@@ -1122,7 +1122,7 @@ spec:
|
||||
name: chatqna-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1209,7 +1209,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/chatqna:v0.9"
|
||||
image: "opea/chatqna:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -71,7 +71,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeGen/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
|
||||
@@ -84,7 +84,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeGen/docker/xeon
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
@@ -10,7 +10,6 @@ ENV LANG=C.UTF-8
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -103,7 +103,7 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeGen/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate the MicroServices and MegaService
|
||||
|
||||
@@ -106,7 +106,7 @@ Note: Please replace the `host_ip` with you external IP address, do not use `loc
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeGen/docker/xeon
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate the MicroServices and MegaService
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "8028:80"
|
||||
|
||||
@@ -36,5 +36,5 @@ In the below example we illustrate on Xeon.
|
||||
```bash
|
||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl -s --no-buffer $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
||||
```
|
||||
|
||||
@@ -170,7 +170,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -326,7 +326,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codegen:v0.9"
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -169,7 +169,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -239,7 +239,7 @@ spec:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -322,7 +322,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codegen:v0.9"
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -126,7 +126,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -179,7 +179,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -230,7 +230,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/codegen:v0.9"
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: codegen
|
||||
@@ -273,7 +273,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/codegen-react-ui:v0.9"
|
||||
image: "opea/codegen-react-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
|
||||
@@ -57,7 +57,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeTrans/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
|
||||
@@ -70,7 +70,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeTrans/docker/xeon
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -62,7 +62,7 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeTrans/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -70,7 +70,7 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeTrans/docker/xeon
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: codetrans-tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -170,7 +170,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -324,7 +324,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codetrans:v0.9"
|
||||
image: "opea/codetrans:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -169,7 +169,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -239,7 +239,7 @@ spec:
|
||||
name: codetrans-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -322,7 +322,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codetrans:v0.9"
|
||||
image: "opea/codetrans:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -8,7 +8,6 @@ COPY GenAIComps /home/user/GenAIComps
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
@@ -27,4 +26,4 @@ USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "retrieval_tool.py"]
|
||||
ENTRYPOINT ["python", "retrieval_tool.py"]
|
||||
|
||||
@@ -1,36 +1,36 @@
|
||||
# DocRetriever Application
|
||||
# DocRetriever Application with Docker
|
||||
|
||||
DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
|
||||
|
||||
### 1. Build Images for necessary microservices. (This step will not needed after docker image released)
|
||||
## 1. Build Images for necessary microservices. (This step will not needed after docker image released)
|
||||
|
||||
- Embedding TEI Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
|
||||
```
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
|
||||
```
|
||||
|
||||
- Retriever Vector store Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
|
||||
```
|
||||
```bash
|
||||
docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
|
||||
```
|
||||
|
||||
- Rerank TEI Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
|
||||
```
|
||||
```bash
|
||||
docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
|
||||
```
|
||||
|
||||
- Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
|
||||
```
|
||||
```bash
|
||||
docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Images for MegaService
|
||||
## 2. Build Images for MegaService
|
||||
|
||||
```bash
|
||||
cd ..
|
||||
@@ -38,7 +38,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
|
||||
docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f GenAIExamples/DocIndexRetriever/docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Start all the services Docker Containers
|
||||
## 3. Start all the services Docker Containers
|
||||
|
||||
```bash
|
||||
export host_ip="YOUR IP ADDR"
|
||||
@@ -59,10 +59,10 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export llm_hardware='xeon' #xeon, xpu, gaudi
|
||||
cd GenAIExamples/DocIndexRetriever/docker/${llm_hardware}/
|
||||
TAG=v0.9 docker compose -f docker-compose.yaml up -d
|
||||
docker compose -f docker-compose.yaml up -d
|
||||
```
|
||||
|
||||
### 3. Validation
|
||||
## 3. Validation
|
||||
|
||||
Add Knowledge Base via HTTP Links:
|
||||
|
||||
@@ -86,41 +86,41 @@ curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: applicati
|
||||
{"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
|
||||
```
|
||||
|
||||
### 4. Trouble shooting
|
||||
## 4. Trouble shooting
|
||||
|
||||
1. check all containers are alive
|
||||
|
||||
```bash
|
||||
# redis vector store
|
||||
docker container logs redis-vector-db
|
||||
# dataprep to redis microservice, input document files
|
||||
docker container logs dataprep-redis-server
|
||||
```bash
|
||||
# redis vector store
|
||||
docker container logs redis-vector-db
|
||||
# dataprep to redis microservice, input document files
|
||||
docker container logs dataprep-redis-server
|
||||
|
||||
# embedding microservice
|
||||
curl http://${host_ip}:6000/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"Explain the OPEA project"}' \
|
||||
-H 'Content-Type: application/json' > query
|
||||
docker container logs embedding-tei-server
|
||||
# embedding microservice
|
||||
curl http://${host_ip}:6000/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"Explain the OPEA project"}' \
|
||||
-H 'Content-Type: application/json' > query
|
||||
docker container logs embedding-tei-server
|
||||
|
||||
# if you used tei-gaudi
|
||||
docker container logs tei-embedding-gaudi-server
|
||||
# if you used tei-gaudi
|
||||
docker container logs tei-embedding-gaudi-server
|
||||
|
||||
# retriever microservice, input embedding output docs
|
||||
curl http://${host_ip}:7000/v1/retrieval \
|
||||
-X POST \
|
||||
-d @query \
|
||||
-H 'Content-Type: application/json' > rerank_query
|
||||
docker container logs retriever-redis-server
|
||||
# retriever microservice, input embedding output docs
|
||||
curl http://${host_ip}:7000/v1/retrieval \
|
||||
-X POST \
|
||||
-d @query \
|
||||
-H 'Content-Type: application/json' > rerank_query
|
||||
docker container logs retriever-redis-server
|
||||
|
||||
|
||||
# reranking microservice
|
||||
curl http://${host_ip}:8000/v1/reranking \
|
||||
-X POST \
|
||||
-d @rerank_query \
|
||||
-H 'Content-Type: application/json' > output
|
||||
docker container logs reranking-tei-server
|
||||
# reranking microservice
|
||||
curl http://${host_ip}:8000/v1/reranking \
|
||||
-X POST \
|
||||
-d @rerank_query \
|
||||
-H 'Content-Type: application/json' > output
|
||||
docker container logs reranking-tei-server
|
||||
|
||||
# megaservice gateway
|
||||
docker container logs doc-index-retriever-server
|
||||
```
|
||||
# megaservice gateway
|
||||
docker container logs doc-index-retriever-server
|
||||
```
|
||||
|
||||
@@ -58,7 +58,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/docker/gaudi/
|
||||
TAG=v0.9 docker compose -f compose.yaml up -d
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
|
||||
@@ -71,7 +71,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/docker/xeon/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -73,7 +73,7 @@ export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
@@ -86,7 +86,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
|
||||
@@ -82,7 +82,7 @@ export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
@@ -95,7 +95,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/docker/xeon
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
|
||||
The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
|
||||
|
||||
The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the
|
||||
the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
|
||||
the image ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
|
||||
service tgi-gaudi-svc, which uses the image ghcr.io/huggingface/tgi-gaudi:1.2.1. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3.
|
||||
|
||||
[NOTE]
|
||||
@@ -60,7 +60,7 @@ This involves deploying the application pipeline custom resource. You can use do
|
||||
```bash
|
||||
export CLIENT_POD=$(kubectl get pod -n ${ns} -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n $ns -o jsonpath="{.items[?(@.metadata.name=='docsum')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n $ns -- curl $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json'
|
||||
kubectl exec "$CLIENT_POD" -n $ns -- curl -s --no-buffer $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
7. Clean up. Use standard Kubernetes custom resource remove commands. Confirm cleaned by retrieving pods in application namespace.
|
||||
|
||||
@@ -170,7 +170,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-docsum-tgi:v0.9"
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -324,7 +324,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/docsum:v0.9"
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -169,7 +169,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-docsum-tgi:v0.9"
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -239,7 +239,7 @@ spec:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -322,7 +322,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/docsum:v0.9"
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -126,7 +126,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -180,7 +180,7 @@ spec:
|
||||
value:
|
||||
|
||||
securityContext: {}
|
||||
image: "opea/llm-docsum-tgi:v0.9"
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -231,7 +231,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/docsum:v0.9"
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: docsum
|
||||
@@ -274,7 +274,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/docsum-react-ui:v0.9"
|
||||
image: "opea/docsum-react-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
|
||||
@@ -9,7 +9,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
@@ -86,7 +86,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/FaqGen/docker/gaudi
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -85,7 +85,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/FaqGen/docker/xeon
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
|
||||
container_name: tgi-xeon-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -117,7 +117,7 @@ spec:
|
||||
value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
image: opea/llm-faqgen-tgi:v0.9
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
@@ -166,7 +166,7 @@ spec:
|
||||
value: faq-mega-server-svc
|
||||
- name: MEGA_SERVICE_PORT
|
||||
value: "7777"
|
||||
image: opea/faqgen:v0.9
|
||||
image: opea/faqgen:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
|
||||
@@ -24,7 +24,7 @@ spec:
|
||||
env:
|
||||
- name: DOC_BASE_URL
|
||||
value: http://{insert_your_ip_here}:7779/v1/faqgen
|
||||
image: opea/faqgen-ui:v0.9
|
||||
image: opea/faqgen-ui:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
|
||||
@@ -96,7 +96,7 @@ spec:
|
||||
value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
image: opea/llm-faqgen-tgi:v0.9
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
@@ -145,7 +145,7 @@ spec:
|
||||
value: faq-mega-server-cpu-svc
|
||||
- name: MEGA_SERVICE_PORT
|
||||
value: "7777"
|
||||
image: opea/faqgen:v0.9
|
||||
image: opea/faqgen:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
|
||||
@@ -126,7 +126,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -179,7 +179,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen-tgi:v0.9"
|
||||
image: "opea/llm-faqgen-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -230,7 +230,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/faqgen:v0.9"
|
||||
image: "opea/faqgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: faqgen
|
||||
@@ -273,7 +273,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/faqgen-react-ui:v0.9"
|
||||
image: "opea/faqgen-react-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
OPEA Productivity Suite, is a powerful tool designed to streamline your workflow and boost productivity. This application leverages the cutting-edge OPEA microservices to provide a comprehensive suite of features that cater to the diverse needs of modern enterprises.
|
||||
|
||||
### Key Features
|
||||
## Key Features
|
||||
|
||||
- Chat with Documents: Engage in intelligent conversations with your documents using our advanced RAG Capabilities. Our Retrieval-Augmented Generation (RAG) model allows you to ask questions, receive relevant information, and gain insights from your documents in real-time.
|
||||
|
||||
|
||||
@@ -1,66 +1,72 @@
|
||||
<h1 align="center" id="title"> Productivity Suite React UI</h1>
|
||||
# Productivity Suite React UI
|
||||
|
||||
### 📸 Project Screenshots
|
||||
## 📸 Project Screenshots
|
||||
|
||||

|
||||

|
||||
|
||||
<h2>🧐 Features</h2>
|
||||
## 🧐 Features
|
||||
|
||||
Here're some of the project's features:
|
||||
|
||||
#### CHAT QNA
|
||||
### CHAT QNA
|
||||
|
||||
- Start a Text Chat:Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
|
||||
- Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
|
||||
|
||||
##### DATA SOURCE
|
||||
#### DATA SOURCE
|
||||
|
||||
- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
|
||||
- Uploaded File would get listed and user would be able add or remove file/links
|
||||
- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
|
||||
- Uploaded File would get listed and user would be able add or remove file/links
|
||||
|
||||
###### Screen Shot
|
||||
##### Screen Shot
|
||||
|
||||

|
||||

|
||||
|
||||
- Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
|
||||
- Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
|
||||
- Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
|
||||
###### Screen Shots
|
||||

|
||||

|
||||
|
||||
#### CODEGEN
|
||||
##### Screen Shots
|
||||
|
||||

|
||||

|
||||
|
||||
### CODEGEN
|
||||
|
||||
- Generate code: generate the corresponding code based on the current user's input.
|
||||
###### Screen Shot
|
||||

|
||||
|
||||
#### DOC SUMMARY
|
||||
### DOC SUMMARY
|
||||
|
||||
- Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
|
||||
- Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
|
||||
- Scroll to Bottom: The summarized content will automatically scroll to the bottom.
|
||||
###### Screen Shot
|
||||

|
||||

|
||||
|
||||
#### FAQ Generator
|
||||
#### Screen Shot
|
||||
|
||||

|
||||

|
||||
|
||||
### FAQ Generator
|
||||
|
||||
- Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
|
||||
- Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
###### Screen Shot
|
||||

|
||||
|
||||
<h2>🛠️ Get it Running:</h2>
|
||||
#### Screen Shot
|
||||
|
||||

|
||||
|
||||
## 🛠️ Get it Running:
|
||||
|
||||
1. Clone the repo.
|
||||
|
||||
2. cd command to the current folder.
|
||||
|
||||
3. create a .env file and add the following variables and values.
|
||||
```env
|
||||
```
|
||||
VITE_BACKEND_SERVICE_ENDPOINT_CHATQNA=''
|
||||
VITE_BACKEND_SERVICE_ENDPOINT_CODEGEN=''
|
||||
VITE_BACKEND_SERVICE_ENDPOINT_DOCSUM=''
|
||||
|
||||
@@ -63,7 +63,7 @@ cd ..
|
||||
|
||||
The Productivity Suite is composed of multiple GenAIExample reference solutions composed together.
|
||||
|
||||
### 8.1 Build ChatQnA MegaService Docker Images
|
||||
#### 8.1 Build ChatQnA MegaService Docker Images
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
@@ -72,7 +72,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 8.2 Build DocSum Megaservice Docker Images
|
||||
#### 8.2 Build DocSum Megaservice Docker Images
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DocSum/docker
|
||||
@@ -80,7 +80,7 @@ docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_pro
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 8.3 Build CodeGen Megaservice Docker Images
|
||||
#### 8.3 Build CodeGen Megaservice Docker Images
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeGen/docker
|
||||
@@ -88,7 +88,7 @@ docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_pr
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 8.4 Build FAQGen Megaservice Docker Images
|
||||
#### 8.4 Build FAQGen Megaservice Docker Images
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/FaqGen/docker
|
||||
@@ -195,7 +195,7 @@ cd GenAIExamples/ProductivitySuite/docker/xeon/
|
||||
```
|
||||
|
||||
```bash
|
||||
TAG=v0.9 docker compose -f compose.yaml up -d
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
### Setup Keycloak
|
||||
@@ -206,84 +206,84 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail
|
||||
|
||||
1. TEI Embedding Service
|
||||
|
||||
```bash
|
||||
curl ${host_ip}:6006/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl ${host_ip}:6006/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
|
||||
|
||||
Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
|
||||
Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
|
||||
|
||||
```bash
|
||||
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
curl http://${host_ip}:7000/v1/retrieval \
|
||||
-X POST \
|
||||
-d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
curl http://${host_ip}:7000/v1/retrieval \
|
||||
-X POST \
|
||||
-d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8808/rerank \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl http://${host_ip}:8808/rerank \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. LLM backend Service (ChatQnA, DocSum, FAQGen)
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9009/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl http://${host_ip}:9009/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
8. LLM backend Service (CodeGen)
|
||||
7. LLM backend Service (CodeGen)
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8028/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl http://${host_ip}:8028/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
9. ChatQnA LLM Microservice
|
||||
8. ChatQnA LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
10. CodeGen LLM Microservice
|
||||
9. CodeGen LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9001/v1/chat/completions\
|
||||
@@ -498,50 +498,56 @@ Here is an example of running Productivity Suite
|
||||

|
||||

|
||||
|
||||
<h2>🧐 Features</h2>
|
||||
## 🧐 Features
|
||||
|
||||
Here're some of the project's features:
|
||||
|
||||
#### CHAT QNA
|
||||
### CHAT QNA
|
||||
|
||||
- Start a Text Chat:Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
|
||||
- Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
|
||||
|
||||
##### DATA SOURCE
|
||||
### DATA SOURCE
|
||||
|
||||
- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
|
||||
- Uploaded File would get listed and user would be able add or remove file/links
|
||||
- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
|
||||
- Uploaded File would get listed and user would be able add or remove file/links
|
||||
|
||||
###### Screen Shot
|
||||
#### Screen Shot
|
||||
|
||||

|
||||

|
||||
|
||||
- Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
|
||||
- Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
|
||||
- Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
|
||||
###### Screen Shots
|
||||

|
||||

|
||||
|
||||
#### CODEGEN
|
||||
#### Screen Shots
|
||||
|
||||

|
||||

|
||||
|
||||
### CODEGEN
|
||||
|
||||
- Generate code: generate the corresponding code based on the current user's input.
|
||||
###### Screen Shot
|
||||

|
||||
|
||||
#### DOC SUMMARY
|
||||
### DOC SUMMARY
|
||||
|
||||
- Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
|
||||
- Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
|
||||
- Scroll to Bottom: The summarized content will automatically scroll to the bottom.
|
||||
###### Screen Shot
|
||||

|
||||

|
||||
|
||||
#### FAQ Generator
|
||||
#### Screen Shot
|
||||
|
||||

|
||||

|
||||
|
||||
### FAQ Generator
|
||||
|
||||
- Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
|
||||
- Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
###### Screen Shot
|
||||

|
||||
|
||||
#### Screen Shot
|
||||
|
||||

|
||||
|
||||
@@ -22,24 +22,26 @@ To begin with, ensure that you have following prerequisites in place:
|
||||
1. Kubernetes installation: Make sure that you have Kubernetes installed.
|
||||
2. Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker/xeon/README.md) for steps to build the images.
|
||||
3. Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
|
||||
#### a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
|
||||
```
|
||||
# You may set the HUGGINGFACEHUB_API_TOKEN via method:
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
cd GenAIExamples/ProductivitySuite/kubernetes/manifests/xeon/
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" *.yaml
|
||||
```
|
||||
|
||||
#### b. Set the proxies based on your network configuration
|
||||
```
|
||||
# Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
|
||||
```
|
||||
a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
|
||||
```
|
||||
# You may set the HUGGINGFACEHUB_API_TOKEN via method:
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
cd GenAIExamples/ProductivitySuite/kubernetes/manifests/xeon/
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" *.yaml
|
||||
```
|
||||
|
||||
b. Set the proxies based on your network configuration
|
||||
```
|
||||
# Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
|
||||
```
|
||||
|
||||
c. Set all the backend service endpoint for REACT UI service
|
||||
```
|
||||
# Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
|
||||
# Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
|
||||
```
|
||||
|
||||
#### c. Set all the backend service endpoint for REACT UI service
|
||||
```
|
||||
# Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
|
||||
# Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
|
||||
```
|
||||
4. MODEL_ID and model-volume (OPTIONAL): You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
|
||||
5. After finish with steps above, you can proceed with the deployment of the yaml file.
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ spec:
|
||||
- configMapRef:
|
||||
name: chat-history-config
|
||||
securityContext: null
|
||||
image: "opea/chathistory-mongo-server:v0.9"
|
||||
image: "opea/chathistory-mongo-server:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: chat-history
|
||||
|
||||
@@ -499,7 +499,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/dataprep-redis:v0.9"
|
||||
image: "opea/dataprep-redis:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: data-prep
|
||||
@@ -557,7 +557,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/embedding-tei:v0.9"
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
@@ -615,7 +615,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -753,7 +753,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:v0.9"
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
@@ -811,7 +811,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/retriever-redis:v0.9"
|
||||
image: "opea/retriever-redis:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: retriever-usvc
|
||||
@@ -1069,7 +1069,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/chatqna:v0.9"
|
||||
image: "opea/chatqna:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -171,7 +171,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:v0.9"
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -301,7 +301,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codegen:v0.9"
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -171,7 +171,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-docsum-tgi:v0.9"
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -301,7 +301,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/docsum:v0.9"
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
|
||||
@@ -183,7 +183,7 @@ spec:
|
||||
- configMapRef:
|
||||
name: faqgen-llm-uservice-config
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen-tgi:v0.9"
|
||||
image: "opea/llm-faqgen-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
@@ -234,7 +234,7 @@ spec:
|
||||
- name: no_proxy
|
||||
value: ""
|
||||
securityContext: null
|
||||
image: "opea/faqgen:v0.9"
|
||||
image: "opea/faqgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: faqgen
|
||||
|
||||
@@ -82,7 +82,7 @@ spec:
|
||||
- name: APP_KEYCLOAK_SERVICE_ENDPOINT
|
||||
value: ""
|
||||
securityContext: null
|
||||
image: "opea/productivity-suite-react-ui-server:v0.9"
|
||||
image: "opea/productivity-suite-react-ui-server:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
|
||||
@@ -65,7 +65,7 @@ spec:
|
||||
- configMapRef:
|
||||
name: prompt-registry-config
|
||||
securityContext: null
|
||||
image: "opea/promptregistry-mongo-server:v0.9"
|
||||
image: "opea/promptregistry-mongo-server:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: prompt-registry
|
||||
|
||||
@@ -69,7 +69,7 @@ If your version of `Habana Driver` < 1.16.0 (check with `hl-smi`), run the follo
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/SearchQnA/docker/gaudi/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
|
||||
@@ -82,7 +82,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/SearchQnA/docker/xeon/
|
||||
TAG=v0.9 docker compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user