Compare commits
103 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4d5972112c | ||
|
|
dab0177432 | ||
|
|
e7b000eca5 | ||
|
|
723fddec79 | ||
|
|
f629702004 | ||
|
|
4f3be23efa | ||
|
|
9657f7bc83 | ||
|
|
ac324a9ec2 | ||
|
|
dfaf47978d | ||
|
|
acbaaf8ff0 | ||
|
|
06cb308611 | ||
|
|
e6b4fff05c | ||
|
|
a54ffd2c1e | ||
|
|
f3ffcd50b3 | ||
|
|
947cbe39b8 | ||
|
|
fbb81b67db | ||
|
|
5d39506c5c | ||
|
|
566cf93c34 | ||
|
|
771975510a | ||
|
|
6674832162 | ||
|
|
67df2804de | ||
|
|
46af6f3bc4 | ||
|
|
343d614591 | ||
|
|
87617e761c | ||
|
|
db2d2bd1a1 | ||
|
|
4fa37e7842 | ||
|
|
c73e4e0f26 | ||
|
|
ba78b4c994 | ||
|
|
01c1b7504f | ||
|
|
c016d8264a | ||
|
|
4fd3517f23 | ||
|
|
503a1a9844 | ||
|
|
08f57fa54a | ||
|
|
5a9c109e35 | ||
|
|
c327972776 | ||
|
|
f45e4c6956 | ||
|
|
5dcadf3d3f | ||
|
|
3363a37197 | ||
|
|
b2771ad3f2 | ||
|
|
e81e0e557c | ||
|
|
71363a6b9d | ||
|
|
a39f23a16e | ||
|
|
c9f9acab61 | ||
|
|
040d2b7fd9 | ||
|
|
6296e9f2fb | ||
|
|
c86cf8536d | ||
|
|
039014fbbf | ||
|
|
1c07a38457 | ||
|
|
e93146b33e | ||
|
|
a6385bc6fd | ||
|
|
c26d0f62b8 | ||
|
|
e71aba0080 | ||
|
|
cfcac3f0ec | ||
|
|
d68be058f5 | ||
|
|
45cf553d36 | ||
|
|
1c23d87aa2 | ||
|
|
64bfea9054 | ||
|
|
0a6bad0ab9 | ||
|
|
4f7fc39d66 | ||
|
|
80e3e2a2d3 | ||
|
|
8c384e0314 | ||
|
|
3c9e2aaffd | ||
|
|
acdd712929 | ||
|
|
c297155bea | ||
|
|
923cf69e63 | ||
|
|
7a67298f19 | ||
|
|
a5ed2233b5 | ||
|
|
e12baca3b8 | ||
|
|
939502dba1 | ||
|
|
a072441c06 | ||
|
|
ed483719a8 | ||
|
|
14621f8492 | ||
|
|
2390920b1d | ||
|
|
02a15366bc | ||
|
|
f08d4115db | ||
|
|
5ac77f78da | ||
|
|
ebc165a6aa | ||
|
|
ad8ca8886e | ||
|
|
88eeb0d7e6 | ||
|
|
e22d41362d | ||
|
|
17b9676a3d | ||
|
|
7dd9952f5e | ||
|
|
06c4484b88 | ||
|
|
3913c7bb36 | ||
|
|
abc02e1332 | ||
|
|
cf021ee009 | ||
|
|
70a50d8b78 | ||
|
|
ab9879508f | ||
|
|
08eb2699b7 | ||
|
|
4259240407 | ||
|
|
8bdb598417 | ||
|
|
ac89855ff8 | ||
|
|
c71bc68c9c | ||
|
|
09a3196324 | ||
|
|
015a2b178b | ||
|
|
33f83293d6 | ||
|
|
076bca3bbf | ||
|
|
83712b9f1b | ||
|
|
704ec9234d | ||
|
|
c461b6081f | ||
|
|
d645305816 | ||
|
|
9277fe6201 | ||
|
|
21fab71f6d |
9
.github/CODEOWNERS
vendored
Normal file
9
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
/AudioQnA/ sihan.chen@intel.com
|
||||
/ChatQnA/ liang1.lv@intel.com
|
||||
/CodeGen/ liang1.lv@intel.com
|
||||
/CodeTrans/ sihan.chen@intel.com
|
||||
/DocSum/ sihan.chen@intel.com
|
||||
/FaqGen/ letong.han@intel.com
|
||||
/SearchQnA/ letong.han@intel.com
|
||||
/Translation/ liang1.lv@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com
|
||||
50
.github/workflows/VisualQnA.yml
vendored
50
.github/workflows/VisualQnA.yml
vendored
@@ -1,50 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: VisualQnA-test
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- VisualQnA/**
|
||||
- "!**.md"
|
||||
- "!**/ui/**"
|
||||
- .github/workflows/VisualQnA.yml
|
||||
workflow_dispatch:
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
VisualQnA:
|
||||
runs-on: aise-cluster
|
||||
strategy:
|
||||
matrix:
|
||||
job_name: ["basic"]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: "refs/pull/${{ github.event.number }}/merge"
|
||||
|
||||
- name: Run Test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/VisualQnA/tests
|
||||
bash test_${{ matrix.job_name }}_inference.sh
|
||||
|
||||
- name: Publish pipeline artifact
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.job_name }}
|
||||
path: ${{ github.workspace }}/VisualQnA/tests/*.log
|
||||
166
.github/workflows/_example-workflow.yml
vendored
Normal file
166
.github/workflows/_example-workflow.yml
vendored
Normal file
@@ -0,0 +1,166 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Example jobs
|
||||
permissions: read-all
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
node:
|
||||
required: true
|
||||
type: string
|
||||
example:
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
type: string
|
||||
build:
|
||||
default: true
|
||||
required: false
|
||||
type: boolean
|
||||
scan:
|
||||
default: true
|
||||
required: false
|
||||
type: boolean
|
||||
test_compose:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
test_k8s:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
test_gmc:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
opea_branch:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
jobs:
|
||||
####################################################################################################
|
||||
# Image Build
|
||||
####################################################################################################
|
||||
build-images:
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Clone required Repo
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker
|
||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
|
||||
if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/huggingface/tei-gaudi.git
|
||||
fi
|
||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
fi
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
|
||||
|
||||
- name: Build Image
|
||||
if: ${{ fromJSON(inputs.build) }}
|
||||
uses: opea-project/validation/actions/image-build@main
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
tag: ${{ inputs.tag }}
|
||||
|
||||
####################################################################################################
|
||||
# Trivy Scan
|
||||
####################################################################################################
|
||||
get-image-list:
|
||||
needs: [build-images]
|
||||
if: ${{ fromJSON(inputs.scan) && inputs.node == 'gaudi' }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.scan-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set Matrix
|
||||
id: scan-matrix
|
||||
run: |
|
||||
pip install yq
|
||||
compose_path=${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
|
||||
echo "matrix=$(cat ${compose_path} | yq -r '.[]' | jq 'keys' | jq -c '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
scan-images:
|
||||
needs: [get-image-list, build-images]
|
||||
if: ${{ fromJSON(inputs.scan) && inputs.node == 'gaudi'}}
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Pull Image
|
||||
run: |
|
||||
docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
|
||||
|
||||
- name: Scan Container
|
||||
uses: opea-project/validation/actions/trivy-scan@main
|
||||
with:
|
||||
image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
output: ${{ matrix.image }}-scan.txt
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
|
||||
- uses: actions/upload-artifact@v4.3.4
|
||||
with:
|
||||
name: ${{ matrix.image }}-scan
|
||||
path: ${{ matrix.image }}-scan.txt
|
||||
overwrite: true
|
||||
|
||||
####################################################################################################
|
||||
# Docker Compose Test
|
||||
####################################################################################################
|
||||
test-example-compose:
|
||||
needs: [build-images]
|
||||
if: ${{ fromJSON(inputs.test_compose) }}
|
||||
uses: ./.github/workflows/_run-docker-compose.yml
|
||||
with:
|
||||
tag: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
####################################################################################################
|
||||
# K8S Test
|
||||
####################################################################################################
|
||||
test-k8s-manifest:
|
||||
needs: [build-images]
|
||||
if: ${{ fromJSON(inputs.test_k8s) }}
|
||||
uses: ./.github/workflows/_manifest-e2e.yml
|
||||
with:
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
tag: ${{ inputs.tag }}
|
||||
context: "CD"
|
||||
secrets: inherit
|
||||
|
||||
####################################################################################################
|
||||
# GMC Test
|
||||
####################################################################################################
|
||||
test-gmc-pipeline:
|
||||
needs: [build-images]
|
||||
if: ${{ fromJSON(inputs.test_gmc) }}
|
||||
uses: ./.github/workflows/_gmc-e2e.yml
|
||||
with:
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
secrets: inherit
|
||||
@@ -51,7 +51,10 @@ jobs:
|
||||
run: |
|
||||
set -xe
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
base_commit=${{ github.event.pull_request.base.sha }}
|
||||
LATEST_COMMIT_SHA=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||
"https://api.github.com/repos/opea-project/GenAIExamples/commits?sha=main" | jq -r '.[0].sha')
|
||||
echo "Latest commit SHA is $LATEST_COMMIT_SHA"
|
||||
base_commit=$LATEST_COMMIT_SHA
|
||||
else
|
||||
base_commit=$(git rev-parse HEAD~1) # push event
|
||||
fi
|
||||
@@ -1,57 +1,51 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with GMC
|
||||
# This workflow will only test GMC pipeline and will not install GMC any more
|
||||
name: Single GMC E2e Test For CD Workflow Call
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/**"
|
||||
- "**/tests/test_gmc**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- "!**/kubernetes/manifests/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
workflow_call:
|
||||
inputs:
|
||||
example:
|
||||
default: "ChatQnA"
|
||||
description: "The example to test on K8s"
|
||||
required: true
|
||||
type: string
|
||||
hardware:
|
||||
default: "xeon"
|
||||
description: "Nodes to run the test, xeon or gaudi"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/reuse-get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
|
||||
xeon_server_label: 'xeon'
|
||||
gaudi_server_label: 'gaudi'
|
||||
|
||||
gmc-test:
|
||||
needs: [job1]
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
runs-on: "k8s-${{ matrix.hardware }}"
|
||||
runs-on: "k8s-${{ inputs.hardware }}"
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: E2e test gmc
|
||||
run: |
|
||||
echo "Matrix - gmc: ${{ matrix.example }}"
|
||||
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: "refs/pull/${{ github.event.number }}/merge"
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
|
||||
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "APP_NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "APP_NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
|
||||
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
|
||||
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
@@ -65,16 +59,16 @@ jobs:
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
run: |
|
||||
if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh ]]; then
|
||||
if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh ]]; then
|
||||
echo "No test script found, exist test!"
|
||||
exit 0
|
||||
else
|
||||
echo "should_cleanup=true" >> $GITHUB_ENV
|
||||
${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh install_${{ matrix.example }}
|
||||
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh install_${{ inputs.example }}
|
||||
echo "Testing ${{ inputs.example }}, waiting for pod ready..."
|
||||
if kubectl rollout status deployment --namespace "$APP_NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Testing gmc ${{ matrix.example }}, running validation test..."
|
||||
${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }}
|
||||
echo "Testing gmc ${{ inputs.example }}, running validation test..."
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }}
|
||||
else
|
||||
echo "Timeout waiting for pods in namespace $APP_NAMESPACE to be ready!"
|
||||
exit 1
|
||||
146
.github/workflows/_gmc-workflow.yml
vendored
Normal file
146
.github/workflows/_gmc-workflow.yml
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build and deploy GMC system on call and manual
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
default: "latest"
|
||||
required: true
|
||||
type: string
|
||||
description: "Tag to apply to images"
|
||||
node:
|
||||
default: "xeon"
|
||||
required: true
|
||||
type: string
|
||||
description: "Hardware to run test"
|
||||
opea_branch:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
description: 'OPEA branch for image build'
|
||||
workflow_call:
|
||||
inputs:
|
||||
tag:
|
||||
default: "latest"
|
||||
required: true
|
||||
type: string
|
||||
description: "Tag to apply to images"
|
||||
node:
|
||||
default: "xeon"
|
||||
required: true
|
||||
type: string
|
||||
description: "Hardware to run test"
|
||||
opea_branch:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
description: 'OPEA branch for image build'
|
||||
|
||||
jobs:
|
||||
####################################################################################################
|
||||
# Image Build and Scan
|
||||
####################################################################################################
|
||||
image-build:
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Checkout GenAIInfra repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: opea-project/GenAIInfra
|
||||
ref: ${{ inputs.opea_branch }}
|
||||
path: GenAIInfra
|
||||
|
||||
- name: Set variables
|
||||
id: set_variables
|
||||
run: |
|
||||
echo "DOCKER_REGISTRY=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
|
||||
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_OUTPUT
|
||||
echo "VERSION=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
echo "VERSION=${{ inputs.tag }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Build image and push
|
||||
run: |
|
||||
cd ${{github.workspace}}/GenAIInfra/microservices-connector
|
||||
make docker.build
|
||||
make docker.push
|
||||
|
||||
- name: Scan gmcmanager
|
||||
if: ${{ inputs.node == 'gaudi' }}
|
||||
uses: opea-project/validation/actions/trivy-scan@main
|
||||
with:
|
||||
image-ref: ${{ env.DOCKER_REGISTRY }}/gmcmanager:${{ env.VERSION }}
|
||||
output: gmcmanager-scan.txt
|
||||
|
||||
- name: Upload gmcmanager scan result
|
||||
if: ${{ inputs.node == 'gaudi' }}
|
||||
uses: actions/upload-artifact@v4.3.4
|
||||
with:
|
||||
name: gmcmanager-scan
|
||||
path: gmcmanager-scan.txt
|
||||
overwrite: true
|
||||
|
||||
- name: Scan gmcrouter
|
||||
if: ${{ inputs.node == 'gaudi' }}
|
||||
uses: opea-project/validation/actions/trivy-scan@main
|
||||
with:
|
||||
image-ref: ${{ env.DOCKER_REGISTRY }}/gmcrouter:${{ env.VERSION }}
|
||||
output: gmcrouter-scan.txt
|
||||
|
||||
- name: Upload gmcrouter scan result
|
||||
if: ${{ inputs.node == 'gaudi' }}
|
||||
uses: actions/upload-artifact@v4.3.4
|
||||
with:
|
||||
name: gmcrouter-scan
|
||||
path: gmcrouter-scan.txt
|
||||
overwrite: true
|
||||
|
||||
- name: Clean up images
|
||||
if: always()
|
||||
run: |
|
||||
docker rmi ${{ env.DOCKER_REGISTRY }}/gmcrouter:${{ env.VERSION }}
|
||||
docker rmi ${{ env.DOCKER_REGISTRY }}/gmcmanager:${{ env.VERSION }}
|
||||
|
||||
- name: Clean up GenAIInfra source codes
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf ${{github.workspace}}/GenAIInfra
|
||||
|
||||
####################################################################################################
|
||||
# GMC Install
|
||||
####################################################################################################
|
||||
gmc-install:
|
||||
needs: image-build
|
||||
runs-on: "k8s-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Checkout GenAIInfra repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: opea-project/GenAIInfra
|
||||
ref: ${{ inputs.opea_branch }}
|
||||
path: GenAIInfra
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
echo "SYSTEM_NAMESPACE=opea-system" >> $GITHUB_ENV
|
||||
echo "VERSION=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
echo "SET_VERSION=true" >> $GITHUB_ENV # to change the tag of microservice images
|
||||
|
||||
- name: Cleanup existing GMC
|
||||
run: |
|
||||
cd GenAIInfra
|
||||
.github/workflows/scripts/e2e/gmc_install.sh cleanup_gmc
|
||||
cd ..
|
||||
|
||||
- name: Install GMC
|
||||
run: |
|
||||
cd GenAIInfra
|
||||
.github/workflows/scripts/e2e/gmc_install.sh install_gmc
|
||||
cd ..
|
||||
|
||||
- name: Clean up GenAIInfra source codes
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf ${{github.workspace}}/GenAIInfra
|
||||
@@ -34,6 +34,10 @@ jobs:
|
||||
image_repo: ${{ steps.build-megaservice-image.outputs.image_repo }}
|
||||
image_tag: ${{ steps.build-megaservice-image.outputs.image_tag }}
|
||||
steps:
|
||||
- name: Clean up Working Directory
|
||||
run: |
|
||||
sudo rm -rf ${{github.workspace}}/* || true
|
||||
|
||||
- name: Get checkout ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
@@ -62,3 +66,4 @@ jobs:
|
||||
fi
|
||||
echo "IMAGE_TAG=${IMAGE_TAG}"
|
||||
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
|
||||
echo "image_repo=${IMAGE_REPO}" >> $GITHUB_OUTPUT
|
||||
105
.github/workflows/_manifest-e2e.yml
vendored
Normal file
105
.github/workflows/_manifest-e2e.yml
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Single Kubernetes Manifest E2e Test For Call
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
example:
|
||||
default: "ChatQnA"
|
||||
description: "The example to test on K8s"
|
||||
required: true
|
||||
type: string
|
||||
hardware:
|
||||
default: "xeon"
|
||||
description: "Nodes to run the test, xeon or gaudi"
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images, default is latest"
|
||||
required: false
|
||||
type: string
|
||||
context:
|
||||
default: "CI"
|
||||
description: "CI or CD"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
manifest-test:
|
||||
runs-on: "k8s-${{ inputs.hardware }}"
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
|
||||
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
|
||||
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=$NAMESPACE"
|
||||
|
||||
- name: Kubectl install
|
||||
id: install
|
||||
run: |
|
||||
if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh ]]; then
|
||||
echo "No test script found, exist test!"
|
||||
exit 0
|
||||
else
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh init_${{ inputs.example }}
|
||||
echo "should_cleanup=true" >> $GITHUB_ENV
|
||||
kubectl create ns $NAMESPACE
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh install_${{ inputs.example }} $NAMESPACE
|
||||
echo "Testing ${{ inputs.example }}, waiting for pod ready..."
|
||||
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Testing manifests ${{ inputs.example }}, waiting for pod ready done!"
|
||||
echo "skip_validate=false" >> $GITHUB_ENV
|
||||
else
|
||||
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
|
||||
exit 1
|
||||
fi
|
||||
sleep 60
|
||||
fi
|
||||
|
||||
- name: Validate e2e test
|
||||
if: always()
|
||||
run: |
|
||||
if $skip_validate; then
|
||||
echo "Skip validate"
|
||||
else
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE
|
||||
fi
|
||||
|
||||
- name: Kubectl uninstall
|
||||
if: always()
|
||||
run: |
|
||||
if $should_cleanup; then
|
||||
if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
|
||||
kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
117
.github/workflows/_run-docker-compose.yml
vendored
Normal file
117
.github/workflows/_run-docker-compose.yml
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Image Build
|
||||
permissions: read-all
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
registry:
|
||||
description: Container Registry URL
|
||||
required: false
|
||||
default: ""
|
||||
type: string
|
||||
tag:
|
||||
description: Container Tag
|
||||
required: false
|
||||
default: "latest"
|
||||
type: string
|
||||
example:
|
||||
description: Example to test
|
||||
required: true
|
||||
type: string
|
||||
hardware:
|
||||
description: Hardware to run the test on
|
||||
required: true
|
||||
type: string
|
||||
jobs:
|
||||
get-test-case:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
test_cases: ${{ steps.test-case-matrix.outputs.test_cases }}
|
||||
CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
steps:
|
||||
- name: Get checkout ref
|
||||
id: get-checkout-ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge
|
||||
else
|
||||
CHECKOUT_REF=${{ github.ref }}
|
||||
fi
|
||||
echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT
|
||||
echo "checkout ref ${CHECKOUT_REF}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get test matrix
|
||||
shell: bash
|
||||
id: test-case-matrix
|
||||
run: |
|
||||
set -x
|
||||
example_l=$(echo ${{ inputs.example }} | tr '[:upper:]' '[:lower:]')
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/tests
|
||||
test_cases=$(find . -type f -name "test_${example_l}*on_${{ inputs.hardware }}.sh" -print | cut -d/ -f2 | jq -R '.' | jq -sc '.')
|
||||
echo "test_cases=$test_cases" >> $GITHUB_OUTPUT
|
||||
|
||||
run-test:
|
||||
needs: [get-test-case]
|
||||
strategy:
|
||||
matrix:
|
||||
test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ inputs.hardware }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Clean up Working Directory
|
||||
run: |
|
||||
sudo rm -rf ${{github.workspace}}/* || true
|
||||
docker system prune -f
|
||||
docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
|
||||
docker rmi $(docker images --filter reference="*/*:ci" -q) || true
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ needs.get-test-case.outputs.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Run test
|
||||
shell: bash
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
IMAGE_REPO: ${{ inputs.registry }}
|
||||
IMAGE_TAG: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.hardware }}
|
||||
test_case: ${{ matrix.test_case }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/$example/tests
|
||||
if [[ "$IMAGE_REPO" == "" ]]; then export IMAGE_REPO="${OPEA_IMAGE_REPO}opea"; fi
|
||||
if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
|
||||
|
||||
- name: Clean up container
|
||||
shell: bash
|
||||
if: cancelled() || failure()
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker/${{ inputs.hardware }}
|
||||
yaml_files=$(find . -type f -name "*compose*yaml")
|
||||
for file in $yaml_files; do
|
||||
docker compose -f ${file} stop && docker compose -f ${file} rm -f || true
|
||||
done
|
||||
docker system prune -f
|
||||
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
|
||||
|
||||
- name: Publish pipeline artifact
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.test_case }}
|
||||
path: ${{ github.workspace }}/${{ inputs.example }}/tests/*.log
|
||||
91
.github/workflows/docker-compose-e2e.yml
vendored
91
.github/workflows/docker-compose-e2e.yml
vendored
@@ -1,91 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with docker compose
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/docker/**"
|
||||
- "**/tests/**"
|
||||
- "**/ui/**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- .github/workflows/docker-compose-e2e.yml
|
||||
workflow_dispatch:
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/reuse-get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|README.md|*.txt|deprecate|kubernetes|manifest|gmc|assets'
|
||||
|
||||
mega-image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
uses: ./.github/workflows/reuse-image-build.yml
|
||||
with:
|
||||
image_tag: ${{ github.event.pull_request.head.sha }}
|
||||
mega_service: "${{ matrix.example }}"
|
||||
runner_label: "docker-build-${{ matrix.hardware }}"
|
||||
|
||||
Example-test:
|
||||
needs: [job1, mega-image-build]
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
runs-on: ${{ matrix.hardware }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Test example
|
||||
run: |
|
||||
echo "Matrix - example ${{ matrix.example }}, hardware ${{ matrix.hardware }}"
|
||||
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: "refs/pull/${{ github.event.number }}/merge"
|
||||
|
||||
- name: Run test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
IMAGE_TAG: ${{ needs.mega-image-build.outputs.image_tag }}
|
||||
IMAGE_REPO_GAUDI: ${{ vars.IMAGE_REPO_GAUDI }}
|
||||
IMAGE_REPO_XEON: ${{ vars.IMAGE_REPO_XEON }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/$example/tests
|
||||
if [ "$hardware" == "gaudi" ]; then IMAGE_REPO=$IMAGE_REPO_GAUDI; else IMAGE_REPO=$IMAGE_REPO_XEON; fi
|
||||
export IMAGE_REPO=${IMAGE_REPO}
|
||||
example_l=$(echo $example | tr '[:upper:]' '[:lower:]')
|
||||
if [ -f test_${example_l}_on_${hardware}.sh ]; then timeout 30m bash test_${example_l}_on_${hardware}.sh; else echo "Test script not found, skip test!"; fi
|
||||
|
||||
- name: Clean up container
|
||||
env:
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
if: cancelled() || failure()
|
||||
run: |
|
||||
cd ${{ github.workspace }}/$example/docker/$hardware
|
||||
docker compose stop && docker compose rm -f
|
||||
echo y | docker system prune
|
||||
|
||||
- name: Publish pipeline artifact
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.example }}-${{ matrix.hardware }}
|
||||
path: ${{ github.workspace }}/${{ matrix.example }}/tests/*.log
|
||||
@@ -4,7 +4,7 @@
|
||||
ARG UBUNTU_VER=22.04
|
||||
FROM ubuntu:${UBUNTU_VER} as devel
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
aspell \
|
||||
|
||||
10
.github/workflows/docker/compose/AudioQnA-compose.yaml
vendored
Normal file
10
.github/workflows/docker/compose/AudioQnA-compose.yaml
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
audioqna:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
20
.github/workflows/docker/compose/ChatQnA-compose.yaml
vendored
Normal file
20
.github/workflows/docker/compose/ChatQnA-compose.yaml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
chatqna:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
chatqna-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
chatqna-conversation-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile.react
|
||||
image: ${REGISTRY:-opea}/chatqna-conversation-ui:${TAG:-latest}
|
||||
20
.github/workflows/docker/compose/CodeGen-compose.yaml
vendored
Normal file
20
.github/workflows/docker/compose/CodeGen-compose.yaml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
codegen:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
|
||||
codegen-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
|
||||
codegen-react-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile.react
|
||||
image: ${REGISTRY:-opea}/codegen-conversation-ui:${TAG:-latest}
|
||||
15
.github/workflows/docker/compose/CodeTrans-compose.yaml
vendored
Normal file
15
.github/workflows/docker/compose/CodeTrans-compose.yaml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
codetrans:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
|
||||
codetrans-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}
|
||||
20
.github/workflows/docker/compose/DocSum-compose.yaml
vendored
Normal file
20
.github/workflows/docker/compose/DocSum-compose.yaml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
docsum:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||
docsum-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/docsum-ui:${TAG:-latest}
|
||||
docsum-react-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile.react
|
||||
image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest}
|
||||
20
.github/workflows/docker/compose/FaqGen-compose.yaml
vendored
Normal file
20
.github/workflows/docker/compose/FaqGen-compose.yaml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
faqgen:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
|
||||
faqgen-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/faqgen-ui:${TAG:-latest}
|
||||
faqgen-react-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile.react
|
||||
image: ${REGISTRY:-opea}/faqgen-react-ui:${TAG:-latest}
|
||||
15
.github/workflows/docker/compose/SearchQnA-compose.yaml
vendored
Normal file
15
.github/workflows/docker/compose/SearchQnA-compose.yaml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
searchqna:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
|
||||
searchqna-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
|
||||
15
.github/workflows/docker/compose/Translation-compose.yaml
vendored
Normal file
15
.github/workflows/docker/compose/Translation-compose.yaml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# this file should be run in the root of the repo
|
||||
services:
|
||||
translation:
|
||||
build:
|
||||
context: docker
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
|
||||
translation-ui:
|
||||
build:
|
||||
context: docker/ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest}
|
||||
33
.github/workflows/image-build-on-push.yml
vendored
33
.github/workflows/image-build-on-push.yml
vendored
@@ -1,33 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Test
|
||||
name: Build latest images on push event
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'main' ]
|
||||
paths:
|
||||
- "**/docker/*.py"
|
||||
- "**/docker/Dockerfile"
|
||||
- "**/docker/ui/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/reuse-get-test-matrix.yml
|
||||
|
||||
mega-image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix:
|
||||
workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
|
||||
hardware: ["gaudi","xeon"]
|
||||
uses: ./.github/workflows/reuse-image-build.yml
|
||||
with:
|
||||
image_tag: latest
|
||||
mega_service: "${{ matrix.workload }}"
|
||||
runner_label: docker-build-${{ matrix.hardware }}
|
||||
111
.github/workflows/manifest-e2e.yml
vendored
111
.github/workflows/manifest-e2e.yml
vendored
@@ -1,111 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with manifests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/manifests/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/reuse-get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
|
||||
xeon_server_label: 'xeon'
|
||||
gaudi_server_label: 'gaudi'
|
||||
|
||||
mega-image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
uses: ./.github/workflows/reuse-image-build.yml
|
||||
with:
|
||||
image_tag: ${{ github.event.pull_request.head.sha }}
|
||||
mega_service: "${{ matrix.example }}"
|
||||
runner_label: "docker-build-${{ matrix.hardware }}"
|
||||
|
||||
manifest-test:
|
||||
needs: [job1, mega-image-build]
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
runs-on: "k8s-${{ matrix.hardware }}"
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: E2e test manifest
|
||||
run: |
|
||||
echo "Matrix - manifest: ${{ matrix.example }}"
|
||||
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
|
||||
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${{needs.mega-image-build.outputs.image_tag}}" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
|
||||
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
|
||||
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=$NAMESPACE"
|
||||
|
||||
- name: Kubectl install
|
||||
id: install
|
||||
run: |
|
||||
if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh ]]; then
|
||||
echo "No test script found, exist test!"
|
||||
exit 0
|
||||
else
|
||||
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh init_${{ matrix.example }}
|
||||
echo "should_cleanup=true" >> $GITHUB_ENV
|
||||
kubectl create ns $NAMESPACE
|
||||
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh install_${{ matrix.example }} $NAMESPACE
|
||||
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
|
||||
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
|
||||
echo "skip_validate=false" >> $GITHUB_ENV
|
||||
else
|
||||
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
|
||||
exit 1
|
||||
fi
|
||||
sleep 60
|
||||
fi
|
||||
|
||||
- name: Validate e2e test
|
||||
if: always()
|
||||
run: |
|
||||
if $skip_validate; then
|
||||
echo "Skip validate"
|
||||
else
|
||||
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }} $NAMESPACE
|
||||
fi
|
||||
|
||||
- name: Kubectl uninstall
|
||||
if: always()
|
||||
run: |
|
||||
if $should_cleanup; then
|
||||
if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
|
||||
kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
86
.github/workflows/manual-bom-scan.yml
vendored
Normal file
86
.github/workflows/manual-bom-scan.yml
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Examples docker images BoM scan on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
node:
|
||||
default: "gaudi"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions: read-all
|
||||
jobs:
|
||||
get-image-list:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.scan-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set Matrix
|
||||
id: scan-matrix
|
||||
run: |
|
||||
pip install yq
|
||||
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
|
||||
image_list=[]
|
||||
for example in ${examples[@]}
|
||||
do
|
||||
images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
|
||||
image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
|
||||
done
|
||||
echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
scan-license:
|
||||
needs: get-image-list
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Pull Image
|
||||
run: |
|
||||
docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
|
||||
|
||||
- name: SBOM Scan Container
|
||||
uses: anchore/sbom-action@v0.17.1
|
||||
with:
|
||||
image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
output-file: ${{ matrix.image }}-sbom-scan.txt
|
||||
format: 'spdx-json'
|
||||
|
||||
- name: Security Scan Container
|
||||
uses: aquasecurity/trivy-action@0.24.0
|
||||
with:
|
||||
image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
output: ${{ matrix.image }}-trivy-scan.txt
|
||||
format: 'table'
|
||||
exit-code: '1'
|
||||
ignore-unfixed: true
|
||||
vuln-type: 'os,library'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
|
||||
- uses: actions/upload-artifact@v4.3.4
|
||||
with:
|
||||
name: ${{ matrix.image }}-scan
|
||||
path: ${{ matrix.image }}-*-scan.txt
|
||||
overwrite: true
|
||||
68
.github/workflows/manual-docker-publish.yml
vendored
Normal file
68
.github/workflows/manual-docker-publish.yml
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Examples publish docker image on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
nodes:
|
||||
default: "gaudi"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
publish:
|
||||
default: false
|
||||
description: 'Publish images to docker hub'
|
||||
required: false
|
||||
type: boolean
|
||||
publish_tags:
|
||||
default: "latest,v1.0"
|
||||
description: 'Tag list apply to publish images'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions: read-all
|
||||
jobs:
|
||||
get-image-list:
|
||||
runs-on: ${{ inputs.node }}
|
||||
outputs:
|
||||
matrix: ${{ steps.scan-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set Matrix
|
||||
id: scan-matrix
|
||||
run: |
|
||||
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
|
||||
image_list=[]
|
||||
for example in ${examples[@]}
|
||||
do
|
||||
images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
|
||||
image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
|
||||
done
|
||||
echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
publish:
|
||||
needs: [get-image-list]
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Image Publish
|
||||
uses: opea-project/validation/actions/image-publish@main
|
||||
with:
|
||||
local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
|
||||
image_name: opea/${{ matrix.image }}
|
||||
publish_tags: ${{ inputs.publish_tags }}
|
||||
110
.github/workflows/manual-example-workflow.yml
vendored
Normal file
110
.github/workflows/manual-example-workflow.yml
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Examples CD workflow on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
nodes:
|
||||
default: "gaudi,xeon"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
deploy_gmc:
|
||||
default: false
|
||||
description: 'Whether to deploy gmc'
|
||||
required: true
|
||||
type: boolean
|
||||
build:
|
||||
default: true
|
||||
description: 'Build test required images for Examples'
|
||||
required: false
|
||||
type: boolean
|
||||
scan:
|
||||
default: true
|
||||
description: 'Scan all images with Trivy'
|
||||
required: false
|
||||
type: boolean
|
||||
test_compose:
|
||||
default: true
|
||||
description: 'Test examples with docker compose'
|
||||
required: false
|
||||
type: boolean
|
||||
test_k8s:
|
||||
default: false
|
||||
description: 'Test examples with k8s'
|
||||
required: false
|
||||
type: boolean
|
||||
test_gmc:
|
||||
default: false
|
||||
description: 'Test examples with gmc'
|
||||
required: false
|
||||
type: boolean
|
||||
opea_branch:
|
||||
default: "main"
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions: read-all
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
examples: ${{ steps.get-matrix.outputs.examples }}
|
||||
nodes: ${{ steps.get-matrix.outputs.nodes }}
|
||||
steps:
|
||||
- name: Create Matrix
|
||||
id: get-matrix
|
||||
run: |
|
||||
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
|
||||
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "examples=$examples_json" >> $GITHUB_OUTPUT
|
||||
nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
|
||||
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
|
||||
|
||||
build-deploy-gmc:
|
||||
needs: [get-test-matrix]
|
||||
if: ${{ fromJSON(inputs.deploy_gmc) }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_gmc-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
secrets: inherit
|
||||
|
||||
run-examples:
|
||||
needs: [get-test-matrix, build-deploy-gmc]
|
||||
if: always()
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
example: ${{ matrix.example }}
|
||||
tag: ${{ inputs.tag }}
|
||||
build: ${{ fromJSON(inputs.build) }}
|
||||
scan: ${{ fromJSON(inputs.scan) }}
|
||||
test_compose: ${{ fromJSON(inputs.test_compose) }}
|
||||
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
|
||||
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
secrets: inherit
|
||||
43
.github/workflows/manual-freeze-images.yml
vendored
Normal file
43
.github/workflows/manual-freeze-images.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Freeze base images and 3rd party images on manual event
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
freeze-images:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.ref }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: install skopeo
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt -y install skopeo
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name "NeuralChatBot"
|
||||
git config --global user.email "grp_neural_chat_bot@intel.com"
|
||||
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
bash .github/workflows/scripts/freeze_images.sh
|
||||
|
||||
- name: Commit changes
|
||||
run: |
|
||||
git add .
|
||||
git commit -s -m "Freeze third party images tag"
|
||||
git push
|
||||
46
.github/workflows/manual-freeze-tag.yml
vendored
Normal file
46
.github/workflows/manual-freeze-tag.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Freeze OPEA images release tag in readme on manual event
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
freeze-tag:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.ref }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name "NeuralChatBot"
|
||||
git config --global user.email "grp_neural_chat_bot@intel.com"
|
||||
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
find . -name "*.md" | xargs sed -i "s|^docker\ compose|TAG=${{ github.event.inputs.tag }}\ docker\ compose|g"
|
||||
find . -type f -name "*.yaml" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
|
||||
find . -type f -name "*.md" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
|
||||
|
||||
- name: Commit changes
|
||||
run: |
|
||||
git add .
|
||||
git commit -s -m "Freeze OPEA images tag"
|
||||
git push
|
||||
78
.github/workflows/manual-image-build.yml
vendored
Normal file
78
.github/workflows/manual-image-build.yml
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build latest images on manual event
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
registry:
|
||||
default: ""
|
||||
description: "Registry to store images,e.g., docker.io, default is empty"
|
||||
required: false
|
||||
type: string
|
||||
services:
|
||||
default: "AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation"
|
||||
description: "List of examples to build"
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
nodes:
|
||||
default: "docker-build-xeon,docker-build-gaudi"
|
||||
description: "List of node to run the build on"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
get-build-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
services: ${{ steps.get-services.outputs.services }}
|
||||
nodes: ${{ steps.get-services.outputs.nodes }}
|
||||
steps:
|
||||
- name: Get test Services
|
||||
id: get-services
|
||||
run: |
|
||||
set -x
|
||||
service_list=($(echo ${{ github.event.inputs.services }} | tr ',' ' '))
|
||||
services=$(printf '%s\n' "${service_list[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "services=$services" >> $GITHUB_OUTPUT
|
||||
node_list=($(echo ${{ github.event.inputs.nodes }} | tr ',' ' '))
|
||||
nodes=$(printf '%s\n' "${node_list[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "nodes=$nodes" >> $GITHUB_OUTPUT
|
||||
|
||||
image-build:
|
||||
needs: get-build-matrix
|
||||
strategy:
|
||||
matrix:
|
||||
service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }}
|
||||
node: ${{ fromJSON(needs.get-build-matrix.outputs.nodes) }}
|
||||
runs-on: ${{ matrix.node }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: |
|
||||
sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Config image repo
|
||||
run: |
|
||||
if [[ -z "${{ github.event.inputs.registry }}" ]]; then
|
||||
echo "image_repo=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "image_repo=${{ github.event.inputs.registry }}/" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Build image
|
||||
uses: opea-project/validation/actions/image-build@main
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ matrix.service }}
|
||||
docker_compose_path: ${{ github.workspace }}/.github/workflows/docker/compose/${{ matrix.service }}-compose.yaml
|
||||
registry: ${{ env.image_repo }}opea
|
||||
tag: ${{ github.event.inputs.tag }}
|
||||
40
.github/workflows/pr-docker-compose-e2e.yml
vendored
Normal file
40
.github/workflows/pr-docker-compose-e2e.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with docker compose
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/docker/**"
|
||||
- "**/tests/**"
|
||||
- "**/ui/**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- .github/workflows/pr-docker-compose-e2e.yml
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|README.md|*.txt|deprecate|kubernetes|manifest|gmc|assets'
|
||||
|
||||
example-test:
|
||||
needs: [get-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_run-docker-compose.yml
|
||||
with:
|
||||
registry: "opea"
|
||||
tag: "ci"
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
secrets: inherit
|
||||
38
.github/workflows/pr-gmc-e2e.yaml
vendored
Normal file
38
.github/workflows/pr-gmc-e2e.yaml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with GMC
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/**"
|
||||
- "**/tests/test_gmc**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- "!**/kubernetes/manifests/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
|
||||
xeon_server_label: 'xeon'
|
||||
gaudi_server_label: 'gaudi'
|
||||
|
||||
gmc-test:
|
||||
needs: [job1]
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
uses: ./.github/workflows/_gmc-e2e.yml
|
||||
with:
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
secrets: inherit
|
||||
48
.github/workflows/pr-manifest-e2e.yml
vendored
Normal file
48
.github/workflows/pr-manifest-e2e.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with manifests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/manifests/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
|
||||
xeon_server_label: 'xeon'
|
||||
gaudi_server_label: 'gaudi'
|
||||
|
||||
mega-image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
uses: ./.github/workflows/_image-build.yml
|
||||
with:
|
||||
image_tag: ${{ github.event.pull_request.head.sha }}
|
||||
mega_service: "${{ matrix.example }}"
|
||||
runner_label: "docker-build-${{ matrix.hardware }}"
|
||||
|
||||
manifest-test:
|
||||
needs: [job1, mega-image-build]
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
uses: ./.github/workflows/_manifest-e2e.yml
|
||||
with:
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
tag: ${{ needs.mega-image-build.outputs.image_tag }}
|
||||
secrets: inherit
|
||||
57
.github/workflows/push-image-build.yml
vendored
Normal file
57
.github/workflows/push-image-build.yml
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Test
|
||||
name: Build latest images on push event
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'main' ]
|
||||
paths:
|
||||
- "**/docker/*.py"
|
||||
- "**/docker/Dockerfile"
|
||||
- "**/docker/ui/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
|
||||
mega-image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix:
|
||||
workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
|
||||
hardware: ["gaudi","xeon"]
|
||||
runs-on: docker-build-${{ matrix.hardware }}
|
||||
steps:
|
||||
- name: Clean up Working Directory
|
||||
run: |
|
||||
sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check Docker Compose File Exists
|
||||
env:
|
||||
service: ${{ matrix.workload }}
|
||||
run: |
|
||||
docker_compose_path="${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml"
|
||||
if [ -e $docker_compose_path ]; then
|
||||
echo "file_exists=true" >> $GITHUB_ENV
|
||||
echo "docker_compose_path=${docker_compose_path}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "file_exists=false" >> $GITHUB_ENV
|
||||
echo "docker_compose_path=${docker_compose_path} for this service does not exist, so skipping image build for this service!!!"
|
||||
fi
|
||||
|
||||
- name: Build Image
|
||||
if: env.file_exists == 'true'
|
||||
uses: opea-project/validation/actions/image-build@main
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ matrix.workload }}
|
||||
docker_compose_path: ${{ env.docker_compose_path }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
49
.github/workflows/push-images-path-detection.yml
vendored
Normal file
49
.github/workflows/push-images-path-detection.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check the validity of links in docker_images_list.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize]
|
||||
|
||||
jobs:
|
||||
check-dockerfile-paths:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check the validity of links
|
||||
run: |
|
||||
cd ${{github.workspace}}
|
||||
miss="FALSE"
|
||||
while IFS=: read -r line link; do
|
||||
http_status=$(curl -o /dev/null -s -w "%{http_code}" "$link")
|
||||
if [ "$http_status" -eq 200 ]; then
|
||||
echo "Valid link: $link (Line $line)"
|
||||
else
|
||||
echo "Broken link: $link (Line $line) (Status $http_status) "
|
||||
echo "-----------------retry strat----------------------"
|
||||
retry_http_status=$(curl -o /dev/null -s -w "%{http_code}" "$link")
|
||||
if [ "$retry_http_status" -eq 200 ]; then
|
||||
miss="FALSE"
|
||||
echo "Valid link: $link (Line $line)"
|
||||
echo "---------------Retry is valid---------------------"
|
||||
else
|
||||
miss="TRUE"
|
||||
echo "Retry broken link: $link (Line $line) (Status $http_status) "
|
||||
echo "-------------Retry is not valid-------------------"
|
||||
fi
|
||||
fi
|
||||
done < <(grep -n -oP '(?<=a href=")[^"]*(?=">)' ../../docker_images_list.md)
|
||||
|
||||
if [[ "$miss" == "TRUE" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
shell: bash
|
||||
59
.github/workflows/push-infra-issue-creation.yml
vendored
Normal file
59
.github/workflows/push-infra-issue-creation.yml
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'main','issue' ]
|
||||
paths:
|
||||
- "**/docker/*/compose.yaml"
|
||||
|
||||
name: Create an issue to GenAIInfra on push
|
||||
jobs:
|
||||
job1:
|
||||
name: Create issue
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get changed files
|
||||
id: get-changed-files
|
||||
run: |
|
||||
set -xe
|
||||
base_commit=$(git rev-parse HEAD~1)
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | \
|
||||
grep -E '.*/docker/.*/compose.yaml')" || true
|
||||
|
||||
examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
|
||||
format_examples=$(echo "$examples" | tr '\n' ',')
|
||||
format_examples=${format_examples%,}
|
||||
echo "examples=$format_examples" >> $GITHUB_ENV
|
||||
|
||||
format_changed_files=$(echo "$changed_files" | awk '{print "- "$0}')
|
||||
echo "changed_files<<EOF" >> $GITHUB_ENV
|
||||
echo "$format_changed_files" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
- name: Create Issue
|
||||
uses: daisy-ycguo/create-issue-action@stable
|
||||
with:
|
||||
token: ${{ secrets.Infra_Issue_Token }}
|
||||
owner: opea-project
|
||||
repo: GenAIInfra
|
||||
title: |
|
||||
[ci-auto] GenAIExample ${{ env.examples }} compose.yaml got changed.
|
||||
assignees: lianhao
|
||||
labels: helm
|
||||
body: |
|
||||
## GenAIExample ${{ env.examples }} compose.yaml got changed
|
||||
|
||||
Below files are changed in [this commit](https://github.com/opea-project/GenAIExamples/commit/${{ github.sha }})
|
||||
|
||||
${{ env.changed_files }}
|
||||
|
||||
Please verify if the helm charts and manifests need to be changed accordingly.
|
||||
|
||||
> This issue was created automatically by CI.
|
||||
@@ -6,12 +6,16 @@
|
||||
source /GenAIExamples/.github/workflows/scripts/change_color
|
||||
log_dir=/GenAIExamples/.github/workflows/scripts/codeScan
|
||||
|
||||
find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 {} \; 2>&1 | tee ${log_dir}/hadolint.log
|
||||
find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 --ignore DL3013 {} \; 2>&1 | tee ${log_dir}/hadolint.log
|
||||
|
||||
if [[ $(grep -c "error" ${log_dir}/hadolint.log) != 0 ]]; then
|
||||
$BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $(grep -c "warning" ${log_dir}/hadolint.log) != 0 ]]; then
|
||||
$BOLD_RED && echo "Warning!! Please Click on the artifact button to download and check warning details." && $RESET
|
||||
exit 1
|
||||
fi
|
||||
$BOLD_PURPLE && echo "Congratulations, Hadolint check passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
|
||||
exit 0
|
||||
|
||||
50
.github/workflows/scripts/freeze_images.sh
vendored
Normal file
50
.github/workflows/scripts/freeze_images.sh
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
declare -A dict
|
||||
dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
|
||||
dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference"
|
||||
|
||||
function get_latest_version() {
|
||||
repo_image=$1
|
||||
versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
|
||||
printf "version list:\n$versions\n"
|
||||
latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
|
||||
echo "latest version: $latest_version"
|
||||
replace_image_version $repo_image $latest_version
|
||||
}
|
||||
|
||||
function replace_image_version() {
|
||||
repo_image=$1
|
||||
version=$2
|
||||
if [[ -z "$version" ]]; then
|
||||
echo "version is empty"
|
||||
else
|
||||
echo "replace $repo_image:latest with $repo_image:$version"
|
||||
find . -name "Dockerfile" | xargs sed -i "s|$repo_image:latest.*|$repo_image:$version|g"
|
||||
find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
|
||||
fi
|
||||
}
|
||||
|
||||
function check_branch_name() {
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "$GITHUB_REF_NAME is protected branch"
|
||||
exit 0
|
||||
else
|
||||
echo "branch name is $GITHUB_REF_NAME"
|
||||
fi
|
||||
}
|
||||
|
||||
function main() {
|
||||
check_branch_name
|
||||
for repo_image in "${!dict[@]}"; do
|
||||
echo "::group::check $repo_image"
|
||||
get_latest_version $repo_image
|
||||
echo "::endgroup::"
|
||||
done
|
||||
}
|
||||
|
||||
main
|
||||
106
AgentQnA/README.md
Normal file
106
AgentQnA/README.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# Agents for Question Answering
|
||||
|
||||
## Overview
|
||||
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
|
||||

|
||||
|
||||
### Why Agent for question answering?
|
||||
|
||||
1. Improve relevancy of retrieved context.
|
||||
Agent can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering user's questions. Compared to conventional RAG, RAG agent can significantly improve the correctness and relevancy of the answer.
|
||||
2. Use tools to get additional knowledge.
|
||||
For example, knowledge graphs and SQL databases can be exposed as APIs for Agents to gather knowledge that may be missing in the retrieval vector database.
|
||||
3. Hierarchical agent can further improve performance.
|
||||
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
|
||||
|
||||
### Roadmap
|
||||
|
||||
- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
|
||||
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
|
||||
- v1.0 or later: agents use open-source llm backend.
|
||||
- v1.1 or later: add safeguards
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Build agent docker image </br>
|
||||
First, clone the opea GenAIComps repo
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
```
|
||||
|
||||
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
|
||||
|
||||
```
|
||||
cd GenAIComps
|
||||
docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
|
||||
```
|
||||
|
||||
2. Launch tool services </br>
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
|
||||
3. Set up environment for this example </br>
|
||||
First, clone this repo
|
||||
|
||||
```
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
Second, set up env vars
|
||||
|
||||
```
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# optional: OPANAI_API_KEY
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
4. Launch agent services</br>
|
||||
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
|
||||
To use openai llm, run command below.
|
||||
|
||||
```
|
||||
cd docker/openai/
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
docker logs docgrader-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
|
||||
|
||||
Second, validate worker agent:
|
||||
|
||||
```
|
||||
curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
Third, validate supervisor agent:
|
||||
|
||||
```
|
||||
curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/minmin-intel/GenAIComps/tree/agent-comp-dev/comps/agent/langchain#-4-provide-your-own-tools).
|
||||
BIN
AgentQnA/assets/agent_qna_arch.png
Normal file
BIN
AgentQnA/assets/agent_qna_arch.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 69 KiB |
63
AgentQnA/docker/openai/docker-compose-agent-openai.yaml
Normal file
63
AgentQnA/docker/openai/docker-compose-agent-openai.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
worker-docgrader-agent:
|
||||
image: opea/comps-agent-langchain:latest
|
||||
container_name: docgrader-agent-endpoint
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9095:9095"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent
|
||||
recursion_limit: ${recursion_limit}
|
||||
llm_engine: openai
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
model: ${model}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
streaming: false
|
||||
tools: /home/user/tools/worker_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-worker-agent-service"
|
||||
port: 9095
|
||||
|
||||
supervisor-react-agent:
|
||||
image: opea/comps-agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9090:9090"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_langgraph
|
||||
recursion_limit: ${recursion_limit}
|
||||
llm_engine: openai
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
model: ${model}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
streaming: ${streaming}
|
||||
tools: /home/user/tools/supervisor_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
|
||||
CRAG_SERVER: $CRAG_SERVER
|
||||
WORKER_AGENT_URL: $WORKER_AGENT_URL
|
||||
port: 9090
|
||||
13
AgentQnA/docker/openai/launch_agent_service_openai.sh
Normal file
13
AgentQnA/docker/openai/launch_agent_service_openai.sh
Normal file
@@ -0,0 +1,13 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export recursion_limit=12
|
||||
export model="gpt-4o-mini-2024-07-18"
|
||||
export temperature=0
|
||||
export max_new_tokens=512
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
|
||||
docker compose -f docker-compose-agent-openai.yaml up -d
|
||||
75
AgentQnA/tests/_test_agentqna_on_xeon.sh
Normal file
75
AgentQnA/tests/_test_agentqna_on_xeon.sh
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
echo "IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "OPENAI_API_KEY=${OPENAI_API_KEY}"
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
|
||||
function build_agent_docker_image() {
|
||||
cd $WORKDIR
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
fi
|
||||
cd GenAIComps
|
||||
echo PWD: $(pwd)
|
||||
docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
echo "Starting CRAG server"
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker/openai
|
||||
bash launch_agent_service_openai.sh
|
||||
}
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function run_tests() {
|
||||
echo "----------------Test supervisor agent ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Taylor" "react-agent-endpoint")
|
||||
docker logs react-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function stop_services() {
|
||||
echo "Stopping CRAG server"
|
||||
docker stop $(docker ps -q --filter ancestor=docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0)
|
||||
echo "Stopping Agent services"
|
||||
docker stop $(docker ps -q --filter ancestor=opea/comps-agent-langchain:latest)
|
||||
}
|
||||
|
||||
function main() {
|
||||
build_agent_docker_image
|
||||
start_services
|
||||
run_tests
|
||||
stop_services
|
||||
}
|
||||
|
||||
main
|
||||
330
AgentQnA/tools/pycragapi.py
Normal file
330
AgentQnA/tools/pycragapi.py
Normal file
@@ -0,0 +1,330 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class CRAG(object):
|
||||
"""A client for interacting with the CRAG server, offering methods to query various domains such as Open, Movie, Finance, Music, and Sports. Each method corresponds to an API endpoint on the CRAG server.
|
||||
|
||||
Attributes:
|
||||
server (str): The base URL of the CRAG server. Defaults to "http://127.0.0.1:8080".
|
||||
|
||||
Methods:
|
||||
open_search_entity_by_name(query: str) -> dict: Search for entities by name in the Open domain.
|
||||
open_get_entity(entity: str) -> dict: Retrieve detailed information about an entity in the Open domain.
|
||||
movie_get_person_info(person_name: str) -> dict: Get information about a person related to movies.
|
||||
movie_get_movie_info(movie_name: str) -> dict: Get information about a movie.
|
||||
movie_get_year_info(year: str) -> dict: Get information about movies released in a specific year.
|
||||
movie_get_movie_info_by_id(movie_id: int) -> dict: Get movie information by its unique ID.
|
||||
movie_get_person_info_by_id(person_id: int) -> dict: Get person information by their unique ID.
|
||||
finance_get_company_name(query: str) -> dict: Search for company names in the finance domain.
|
||||
finance_get_ticker_by_name(query: str) -> dict: Retrieve the ticker symbol for a given company name.
|
||||
finance_get_price_history(ticker_name: str) -> dict: Get the price history for a given ticker symbol.
|
||||
finance_get_detailed_price_history(ticker_name: str) -> dict: Get detailed price history for a ticker symbol.
|
||||
finance_get_dividends_history(ticker_name: str) -> dict: Get dividend history for a ticker symbol.
|
||||
finance_get_market_capitalization(ticker_name: str) -> dict: Retrieve market capitalization for a ticker symbol.
|
||||
finance_get_eps(ticker_name: str) -> dict: Get earnings per share (EPS) for a ticker symbol.
|
||||
finance_get_pe_ratio(ticker_name: str) -> dict: Get the price-to-earnings (PE) ratio for a ticker symbol.
|
||||
finance_get_info(ticker_name: str) -> dict: Get financial information for a ticker symbol.
|
||||
music_search_artist_entity_by_name(artist_name: str) -> dict: Search for music artists by name.
|
||||
music_search_song_entity_by_name(song_name: str) -> dict: Search for songs by name.
|
||||
music_get_billboard_rank_date(rank: int, date: str = None) -> dict: Get Billboard ranking for a specific rank and date.
|
||||
music_get_billboard_attributes(date: str, attribute: str, song_name: str) -> dict: Get attributes of a song from Billboard rankings.
|
||||
music_grammy_get_best_artist_by_year(year: int) -> dict: Get the Grammy Best New Artist for a specific year.
|
||||
music_grammy_get_award_count_by_artist(artist_name: str) -> dict: Get the total Grammy awards won by an artist.
|
||||
music_grammy_get_award_count_by_song(song_name: str) -> dict: Get the total Grammy awards won by a song.
|
||||
music_grammy_get_best_song_by_year(year: int) -> dict: Get the Grammy Song of the Year for a specific year.
|
||||
music_grammy_get_award_date_by_artist(artist_name: str) -> dict: Get the years an artist won a Grammy award.
|
||||
music_grammy_get_best_album_by_year(year: int) -> dict: Get the Grammy Album of the Year for a specific year.
|
||||
music_grammy_get_all_awarded_artists() -> dict: Get all artists awarded the Grammy Best New Artist.
|
||||
music_get_artist_birth_place(artist_name: str) -> dict: Get the birthplace of an artist.
|
||||
music_get_artist_birth_date(artist_name: str) -> dict: Get the birth date of an artist.
|
||||
music_get_members(band_name: str) -> dict: Get the member list of a band.
|
||||
music_get_lifespan(artist_name: str) -> dict: Get the lifespan of an artist.
|
||||
music_get_song_author(song_name: str) -> dict: Get the author of a song.
|
||||
music_get_song_release_country(song_name: str) -> dict: Get the release country of a song.
|
||||
music_get_song_release_date(song_name: str) -> dict: Get the release date of a song.
|
||||
music_get_artist_all_works(artist_name: str) -> dict: Get all works by an artist.
|
||||
sports_soccer_get_games_on_date(team_name: str, date: str) -> dict: Get soccer games on a specific date.
|
||||
sports_nba_get_games_on_date(team_name: str, date: str) -> dict: Get NBA games on a specific date.
|
||||
sports_nba_get_play_by_play_data_by_game_ids(game_ids: List[str]) -> dict: Get NBA play by play data for a set of game ids.
|
||||
|
||||
Note:
|
||||
Each method performs a POST request to the corresponding API endpoint and returns the response as a JSON dictionary.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.server = os.environ.get("CRAG_SERVER", "http://127.0.0.1:8080")
|
||||
|
||||
def open_search_entity_by_name(self, query: str):
|
||||
url = self.server + "/open/search_entity_by_name"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": query}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def open_get_entity(self, entity: str):
|
||||
url = self.server + "/open/get_entity"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": entity}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def movie_get_person_info(self, person_name: str):
|
||||
url = self.server + "/movie/get_person_info"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": person_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def movie_get_movie_info(self, movie_name: str):
|
||||
url = self.server + "/movie/get_movie_info"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": movie_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def movie_get_year_info(self, year: str):
|
||||
url = self.server + "/movie/get_year_info"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": year}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def movie_get_movie_info_by_id(self, movid_id: int):
|
||||
url = self.server + "/movie/get_movie_info_by_id"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": movid_id}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def movie_get_person_info_by_id(self, person_id: int):
|
||||
url = self.server + "/movie/get_person_info_by_id"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": person_id}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_company_name(self, query: str):
|
||||
url = self.server + "/finance/get_company_name"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": query}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_ticker_by_name(self, query: str):
|
||||
url = self.server + "/finance/get_ticker_by_name"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": query}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_price_history(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_price_history"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_detailed_price_history(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_detailed_price_history"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_dividends_history(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_dividends_history"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_market_capitalization(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_market_capitalization"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_eps(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_eps"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_pe_ratio(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_pe_ratio"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def finance_get_info(self, ticker_name: str):
|
||||
url = self.server + "/finance/get_info"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": ticker_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_search_artist_entity_by_name(self, artist_name: str):
|
||||
url = self.server + "/music/search_artist_entity_by_name"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": artist_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_search_song_entity_by_name(self, song_name: str):
|
||||
url = self.server + "/music/search_song_entity_by_name"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_billboard_rank_date(self, rank: int, date: str = None):
|
||||
url = self.server + "/music/get_billboard_rank_date"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"rank": rank, "date": date}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_billboard_attributes(self, date: str, attribute: str, song_name: str):
|
||||
url = self.server + "/music/get_billboard_attributes"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"date": date, "attribute": attribute, "song_name": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_best_artist_by_year(self, year: int):
|
||||
url = self.server + "/music/grammy_get_best_artist_by_year"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": year}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_award_count_by_artist(self, artist_name: str):
|
||||
url = self.server + "/music/grammy_get_award_count_by_artist"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": artist_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_award_count_by_song(self, song_name: str):
|
||||
url = self.server + "/music/grammy_get_award_count_by_song"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_best_song_by_year(self, year: int):
|
||||
url = self.server + "/music/grammy_get_best_song_by_year"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": year}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_award_date_by_artist(self, artist_name: str):
|
||||
url = self.server + "/music/grammy_get_award_date_by_artist"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": artist_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_best_album_by_year(self, year: int):
|
||||
url = self.server + "/music/grammy_get_best_album_by_year"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": year}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_grammy_get_all_awarded_artists(self):
|
||||
url = self.server + "/music/grammy_get_all_awarded_artists"
|
||||
headers = {"accept": "application/json"}
|
||||
result = requests.post(url, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_artist_birth_place(self, artist_name: str):
|
||||
url = self.server + "/music/get_artist_birth_place"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": artist_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_artist_birth_date(self, artist_name: str):
|
||||
url = self.server + "/music/get_artist_birth_date"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": artist_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_members(self, band_name: str):
|
||||
url = self.server + "/music/get_members"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": band_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_lifespan(self, artist_name: str):
|
||||
url = self.server + "/music/get_lifespan"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": artist_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_song_author(self, song_name: str):
|
||||
url = self.server + "/music/get_song_author"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_song_release_country(self, song_name: str):
|
||||
url = self.server + "/music/get_song_release_country"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_song_release_date(self, song_name: str):
|
||||
url = self.server + "/music/get_song_release_date"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def music_get_artist_all_works(self, song_name: str):
|
||||
url = self.server + "/music/get_artist_all_works"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"query": song_name}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def sports_soccer_get_games_on_date(self, date: str, team_name: str = None):
|
||||
url = self.server + "/sports/soccer/get_games_on_date"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"team_name": team_name, "date": date}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def sports_nba_get_games_on_date(self, date: str, team_name: str = None):
|
||||
url = self.server + "/sports/nba/get_games_on_date"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"team_name": team_name, "date": date}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
|
||||
def sports_nba_get_play_by_play_data_by_game_ids(self, game_ids: List[str]):
|
||||
url = self.server + "/sports/nba/get_play_by_play_data_by_game_ids"
|
||||
headers = {"accept": "application/json"}
|
||||
data = {"game_ids": game_ids}
|
||||
result = requests.post(url, json=data, headers=headers)
|
||||
return json.loads(result.text)
|
||||
59
AgentQnA/tools/supervisor_agent_tools.yaml
Normal file
59
AgentQnA/tools/supervisor_agent_tools.yaml
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
search_knowledge_base:
|
||||
description: Search knowledge base for a given query. Returns text related to the query.
|
||||
callable_api: tools.py:search_knowledge_base
|
||||
args_schema:
|
||||
query:
|
||||
type: str
|
||||
description: query
|
||||
return_output: retrieved_data
|
||||
|
||||
get_artist_birth_place:
|
||||
description: Get the birth place of an artist.
|
||||
callable_api: tools.py:get_artist_birth_place
|
||||
args_schema:
|
||||
artist_name:
|
||||
type: str
|
||||
description: artist name
|
||||
return_output: birth_place
|
||||
|
||||
get_billboard_rank_date:
|
||||
description: Get Billboard ranking for a specific rank and date.
|
||||
callable_api: tools.py:get_billboard_rank_date
|
||||
args_schema:
|
||||
rank:
|
||||
type: int
|
||||
description: song name
|
||||
date:
|
||||
type: str
|
||||
description: date
|
||||
return_output: billboard_info
|
||||
|
||||
get_song_release_date:
|
||||
description: Get the release date of a song.
|
||||
callable_api: tools.py:get_song_release_date
|
||||
args_schema:
|
||||
song_name:
|
||||
type: str
|
||||
description: song name
|
||||
return_output: release_date
|
||||
|
||||
get_members:
|
||||
description: Get the member list of a band.
|
||||
callable_api: tools.py:get_members
|
||||
args_schema:
|
||||
band_name:
|
||||
type: str
|
||||
description: band name
|
||||
return_output: members
|
||||
|
||||
get_grammy_best_artist_by_year:
|
||||
description: Get the Grammy Best New Artist for a specific year.
|
||||
callable_api: tools.py:get_grammy_best_artist_by_year
|
||||
args_schema:
|
||||
year:
|
||||
type: int
|
||||
description: year
|
||||
return_output: grammy_best_new_artist
|
||||
52
AgentQnA/tools/tools.py
Normal file
52
AgentQnA/tools/tools.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
import requests
|
||||
from tools.pycragapi import CRAG
|
||||
|
||||
|
||||
def search_knowledge_base(query: str) -> str:
|
||||
"""Search the knowledge base for a specific query."""
|
||||
# use worker agent (DocGrader) to search the knowledge base
|
||||
url = os.environ.get("WORKER_AGENT_URL")
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"query": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
return response.json()["text"]
|
||||
|
||||
|
||||
def get_grammy_best_artist_by_year(year: int) -> dict:
|
||||
"""Get the Grammy Best New Artist for a specific year."""
|
||||
api = CRAG()
|
||||
year = int(year)
|
||||
return api.music_grammy_get_best_artist_by_year(year)
|
||||
|
||||
|
||||
def get_members(band_name: str) -> dict:
|
||||
"""Get the member list of a band."""
|
||||
api = CRAG()
|
||||
return api.music_get_members(band_name)
|
||||
|
||||
|
||||
def get_artist_birth_place(artist_name: str) -> dict:
|
||||
"""Get the birthplace of an artist."""
|
||||
api = CRAG()
|
||||
return api.music_get_artist_birth_place(artist_name)
|
||||
|
||||
|
||||
def get_billboard_rank_date(rank: int, date: str = None) -> dict:
|
||||
"""Get Billboard ranking for a specific rank and date."""
|
||||
api = CRAG()
|
||||
rank = int(rank)
|
||||
return api.music_get_billboard_rank_date(rank, date)
|
||||
|
||||
|
||||
def get_song_release_date(song_name: str) -> dict:
|
||||
"""Get the release date of a song."""
|
||||
api = CRAG()
|
||||
return api.music_get_song_release_date(song_name)
|
||||
@@ -1,2 +1,5 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
duckduckgo_search:
|
||||
callable_api: ddg-search
|
||||
@@ -1,272 +0,0 @@
|
||||
# AudioQnA
|
||||
|
||||

|
||||
|
||||
In this example we will show you how to build an Audio Question and Answering application (AudioQnA). AudioQnA serves like a talking bot, enabling LLMs to talk with users. It basically accepts users' audio inputs, converts to texts and feed to LLMs, gets the text answers and converts back to audio outputs.
|
||||
|
||||
What AudioQnA is delivering and why it stands out:
|
||||
|
||||
- Fast ASR/TTS inference as microservices on Intel Xeon CPUs with optimization
|
||||
- Multilingual Zero-shot voice cloning cross languages, customizable voice
|
||||
- Fast LLM inference on Intel Gaudi through TGI with RAG and other features support
|
||||
|
||||
There are four folders under the current example.
|
||||
|
||||
`front_end/`: the UI users interact with
|
||||
`serving/`: TGI LLM service endpoint
|
||||
`langchain/`: pipeline the flow of text input -> RAG -> TGI LLM service -> text output
|
||||
`audio/`: pipeline the flow of audio-to-text service -> langchain -> text-to-audio service -> ui
|
||||
|
||||
## Start the Audio services
|
||||
|
||||
### Build ASR and TTS services
|
||||
|
||||
```shell
|
||||
cd audio/docker
|
||||
|
||||
# Build ASR Docker service
|
||||
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -f Dockerfile_asr -t intel/gen-ai-examples:audioqna-asr
|
||||
# Build TTS Docker service
|
||||
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -f Dockerfile_tts -t intel/gen-ai-examples:audioqna-tts
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
```shell
|
||||
# Start ASR service
|
||||
docker run -d -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8008:8008 intel/gen-ai-examples:audioqna-asr
|
||||
|
||||
# Test ASR
|
||||
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
||||
http_proxy= curl -F 'file=@sample.wav' http://localhost:8008/v1/audio/transcriptions
|
||||
|
||||
# Start TTS service
|
||||
# Predownload local models and mapped in
|
||||
git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_tts_models
|
||||
docker run -d -v ./pretrained_tts_models:/GPT-SoVITS/GPT_SoVITS/pretrained_models -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9880:9880 intel/gen-ai-examples:audioqna-tts --default_refer_path /GPT-SoVITS/sample.wav --default_refer_text="Who is Pat Gelsinger?" --default_refer_language="en" --bf16 --return_text_stream
|
||||
|
||||
# Upload/Change reference audio
|
||||
# http_proxy= curl --location 'localhost:9880/upload_as_default' \
|
||||
# --form 'default_refer_file=@"sample.wav"' \
|
||||
# --form 'default_refer_text="Who is Pat Gelsinger?"' \
|
||||
# --form 'default_refer_language="en"'
|
||||
|
||||
# Test TTS
|
||||
http_proxy= curl --location 'localhost:9880/v1/audio/speech' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"text": "You can have a look, but you should not touch this item.",
|
||||
"text_language": "en"
|
||||
}' \
|
||||
--output output.wav
|
||||
```
|
||||
|
||||
## Prepare TGI Docker
|
||||
|
||||
Getting started is straightforward with the official Docker container. Simply pull the image using:
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
```
|
||||
|
||||
Alternatively, you can build the Docker image yourself using latest [TGI-Gaudi](https://github.com/huggingface/tgi-gaudi) code with the below command:
|
||||
|
||||
```bash
|
||||
bash ./serving/tgi_gaudi/build_docker.sh
|
||||
```
|
||||
|
||||
## Launch TGI Gaudi Service
|
||||
|
||||
### Launch a local server instance on 1 Gaudi card:
|
||||
|
||||
```bash
|
||||
bash ./serving/tgi_gaudi/launch_tgi_service.sh
|
||||
```
|
||||
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<token>
|
||||
```
|
||||
|
||||
### Launch a local server instance on 8 Gaudi cards:
|
||||
|
||||
```bash
|
||||
bash ./serving/tgi_gaudi/launch_tgi_service.sh 8
|
||||
```
|
||||
|
||||
And then you can make requests like below to check the service status:
|
||||
|
||||
```bash
|
||||
curl 127.0.0.1:8080/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":32}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
### Customize TGI Gaudi Service
|
||||
|
||||
The ./serving/tgi_gaudi/launch_tgi_service.sh script accepts three parameters:
|
||||
|
||||
- num_cards: The number of Gaudi cards to be utilized, ranging from 1 to 8. The default is set to 1.
|
||||
- port_number: The port number assigned to the TGI Gaudi endpoint, with the default being 8080.
|
||||
- model_name: The model name utilized for LLM, with the default set to "Intel/neural-chat-7b-v3-3".
|
||||
|
||||
You have the flexibility to customize these parameters according to your specific needs. Additionally, you can set the TGI Gaudi endpoint by exporting the environment variable `TGI_LLM_ENDPOINT`:
|
||||
|
||||
```bash
|
||||
export TGI_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8080"
|
||||
```
|
||||
|
||||
## Enable TEI for embedding model
|
||||
|
||||
Text Embeddings Inference (TEI) is a toolkit designed for deploying and serving open-source text embeddings and sequence classification models efficiently. With TEI, users can extract high-performance features using various popular models. It supports token-based dynamic batching for enhanced performance.
|
||||
|
||||
To launch the TEI service, you can use the following commands:
|
||||
|
||||
```bash
|
||||
model=BAAI/bge-large-en-v1.5
|
||||
revision=refs/pr/5
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
docker run -p 9090:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
|
||||
export TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090"
|
||||
```
|
||||
|
||||
And then you can make requests like below to check the service status:
|
||||
|
||||
```bash
|
||||
curl 127.0.0.1:9090/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service.
|
||||
|
||||
## Launch Redis and LangChain Backend Service
|
||||
|
||||
Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
|
||||
|
||||
```bash
|
||||
cd langchain/docker
|
||||
docker compose -f docker-compose.yml up -d
|
||||
cd ../../
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> If you have modified any files and want that change to be introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub.
|
||||
|
||||
## Ingest data into Redis (Optional)
|
||||
|
||||
Each time the Redis container is launched, data should be ingested into the container using the commands:
|
||||
|
||||
```bash
|
||||
docker exec -it qna-rag-redis-server bash
|
||||
cd /ws
|
||||
python ingest.py
|
||||
exit
|
||||
```
|
||||
|
||||
Note: `ingest.py` will download the embedding model. Please set the proxy if necessary.
|
||||
|
||||
# Start LangChain Server
|
||||
|
||||
## Enable GuardRails using Meta's Llama Guard model (Optional)
|
||||
|
||||
We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/LlamaGuard-7b) model. To activate GuardRails, kindly follow the instructions below to deploy the Llama Guard model on TGI Gaudi.
|
||||
|
||||
```bash
|
||||
volume=$PWD/data
|
||||
model_id="meta-llama/LlamaGuard-7b"
|
||||
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
|
||||
export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
|
||||
```
|
||||
|
||||
And then you can make requests like below to check the service status:
|
||||
|
||||
```bash
|
||||
curl 127.0.0.1:8088/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Start the Backend Service
|
||||
|
||||
Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:
|
||||
|
||||
```bash
|
||||
docker exec -it qna-rag-redis-server bash
|
||||
nohup python app/server.py &
|
||||
```
|
||||
|
||||
The LangChain backend service listens to port 8000, you can customize it by changing the code in `docker/qna-app/app/server.py`.
|
||||
|
||||
And then you can make requests like below to check the LangChain backend service status:
|
||||
|
||||
```bash
|
||||
# non-streaming endpoint
|
||||
curl 127.0.0.1:8000/v1/rag/chat \
|
||||
-X POST \
|
||||
-d '{"query":"What is the total revenue of Nike in 2023?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
```bash
|
||||
# streaming endpoint
|
||||
curl 127.0.0.1:8000/v1/rag/chat_stream \
|
||||
-X POST \
|
||||
-d '{"query":"What is the total revenue of Nike in 2023?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Start the Frontend Service
|
||||
|
||||
Please refer to frontend [README](./front_end/README.md).
|
||||
|
||||
## Enable TGI Gaudi FP8 for higher throughput (Optional)
|
||||
|
||||
The TGI Gaudi utilizes BFLOAT16 optimization as the default setting. If you aim to achieve higher throughput, you can enable FP8 quantization on the TGI Gaudi. Note that currently only Llama2 series and Mistral series models support FP8 quantization. Please follow the below steps to enable FP8 quantization.
|
||||
|
||||
### Prepare Metadata for FP8 Quantization
|
||||
|
||||
Enter into the TGI Gaudi docker container, and then run the below commands:
|
||||
|
||||
```bash
|
||||
pip install git+https://github.com/huggingface/optimum-habana.git
|
||||
git clone https://github.com/huggingface/optimum-habana.git
|
||||
cd optimum-habana/examples/text-generation
|
||||
pip install -r requirements_lm_eval.txt
|
||||
QUANT_CONFIG=./quantization_config/maxabs_measure.json python ../gaudi_spawn.py run_lm_eval.py -o acc_7b_bs1_measure.txt --model_name_or_path Intel/neural-chat-7b-v3-3 --attn_softmax_bf16 --use_hpu_graphs --trim_logits --use_kv_cache --reuse_cache --bf16 --batch_size 1
|
||||
QUANT_CONFIG=./quantization_config/maxabs_quant.json python ../gaudi_spawn.py run_lm_eval.py -o acc_7b_bs1_quant.txt --model_name_or_path Intel/neural-chat-7b-v3-3 --attn_softmax_bf16 --use_hpu_graphs --trim_logits --use_kv_cache --reuse_cache --bf16 --batch_size 1 --fp8
|
||||
```
|
||||
|
||||
After finishing the above commands, the quantization metadata will be generated. Move the metadata directory ./hqt_output/ and copy the quantization JSON file to the host (under …/data). Please adapt the commands with your Docker ID and directory path.
|
||||
|
||||
```bash
|
||||
docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/hqt_output data/
|
||||
docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/quantization_config/maxabs_quant.json data/
|
||||
```
|
||||
|
||||
Then modify the `dump_stats_path` to "/data/hqt_output/measure" and update `dump_stats_xlsx_path` to /data/hqt_output/measure/fp8stats.xlsx" in maxabs_quant.json file.
|
||||
|
||||
### Restart the TGI Gaudi server within all the metadata mapped
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -e QUANT_CONFIG=/data/maxabs_quant.json -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id Intel/neural-chat-7b-v3-3
|
||||
```
|
||||
|
||||
Now the TGI Gaudi will launch the FP8 model by default and you can make requests like below to check the service status:
|
||||
|
||||
```bash
|
||||
curl 127.0.0.1:8080/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":32}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
#
|
||||
|
||||
SCRIPT USAGE NOTICE: By downloading and using any script file included with the associated software package (such as files with .bat, .cmd, or .JS extensions, Docker files, or any other type of file that, when executed, automatically downloads and/or installs files onto your system) (the “Script File”), it is your obligation to review the Script File to understand what files (e.g., other software, AI models, AI Datasets) the Script File will download to your system (“Downloaded Files”). Furthermore, by downloading and using the Downloaded Files, even if they are installed through a silent install, you agree to any and all terms and conditions associated with such files, including but not limited to, license terms, notices, or disclaimers.
|
||||
@@ -1,15 +0,0 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y ffmpeg
|
||||
|
||||
COPY ./asr /asr
|
||||
RUN pip install -r /asr/requirements.txt
|
||||
|
||||
WORKDIR /asr
|
||||
|
||||
ENTRYPOINT ["python", "asr_server.py"]
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
FROM python:3.9-slim
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV PYTHONPATH=/home/user:/GPT-SoVITS/GPT_SoVITS
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y ffmpeg \
|
||||
&& apt-get install -y build-essential wget numactl git \
|
||||
&& apt-get install -y libomp-dev google-perftools
|
||||
|
||||
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libiomp5.so:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
|
||||
ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
|
||||
ENV OMP_NUM_THREADS=56
|
||||
|
||||
|
||||
RUN git clone https://github.com/RVC-Boss/GPT-SoVITS.git /GPT-SoVITS -b main
|
||||
|
||||
RUN pip install -r /GPT-SoVITS/requirements.txt
|
||||
|
||||
COPY ./tts/tts_server.py /GPT-SoVITS/
|
||||
COPY ./tts/config.py /GPT-SoVITS/
|
||||
|
||||
# Download the sample ref wav
|
||||
RUN wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -P /GPT-SoVITS
|
||||
RUN wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/welcome_cn.wav -P /GPT-SoVITS
|
||||
|
||||
|
||||
#RUN useradd -m -s /bin/bash user && \
|
||||
# mkdir -p /home/user && \
|
||||
# chown -R user /home/user/
|
||||
|
||||
#USER user
|
||||
|
||||
WORKDIR /GPT-SoVITS
|
||||
|
||||
ENTRYPOINT ["python", "tts_server.py"]
|
||||
@@ -1,124 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import Audio, Dataset
|
||||
from pydub import AudioSegment
|
||||
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
||||
|
||||
|
||||
class AudioSpeechRecognition:
|
||||
"""Convert audio to text."""
|
||||
|
||||
def __init__(self, model_name_or_path="openai/whisper-small", bf16=False, language="english", device="cpu"):
|
||||
if device == "hpu":
|
||||
# Explicitly link HPU with Torch
|
||||
from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
|
||||
|
||||
adapt_transformers_to_gaudi()
|
||||
|
||||
self.device = device
|
||||
asr_model_name_or_path = os.environ.get("ASR_MODEL_PATH", model_name_or_path)
|
||||
print("Downloading model: {}".format(asr_model_name_or_path))
|
||||
self.model = WhisperForConditionalGeneration.from_pretrained(asr_model_name_or_path).to(self.device)
|
||||
self.processor = WhisperProcessor.from_pretrained(asr_model_name_or_path)
|
||||
self.model.eval()
|
||||
self.bf16 = bf16
|
||||
if self.bf16:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
|
||||
self.model = ipex.optimize(self.model, dtype=torch.bfloat16)
|
||||
self.language = language
|
||||
|
||||
if device == "hpu":
|
||||
# do hpu graph warmup with a long enough input audio
|
||||
# whisper has a receptive field of 30 seconds
|
||||
# here we select a relatively long audio (~15 sec) to quickly warmup
|
||||
self._warmup_whisper_hpu_graph("https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav")
|
||||
|
||||
def _audiosegment_to_librosawav(self, audiosegment):
|
||||
# https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples
|
||||
# This way is faster than librosa.load or HuggingFace Dataset wrapper
|
||||
channel_sounds = audiosegment.split_to_mono()[:1] # only select the first channel
|
||||
samples = [s.get_array_of_samples() for s in channel_sounds]
|
||||
|
||||
fp_arr = np.array(samples).T.astype(np.float32)
|
||||
fp_arr /= np.iinfo(samples[0].typecode).max
|
||||
fp_arr = fp_arr.reshape(-1)
|
||||
|
||||
return fp_arr
|
||||
|
||||
def _warmup_whisper_hpu_graph(self, url):
|
||||
print("[ASR] fetch warmup audio...")
|
||||
urllib.request.urlretrieve(
|
||||
url,
|
||||
"warmup.wav",
|
||||
)
|
||||
print("[ASR] warmup...")
|
||||
waveform = AudioSegment.from_file("warmup.wav").set_frame_rate(16000)
|
||||
waveform = self._audiosegment_to_librosawav(waveform)
|
||||
# pylint: disable=E1101
|
||||
inputs = self.processor.feature_extractor(
|
||||
waveform, return_tensors="pt", sampling_rate=16_000
|
||||
).input_features.to(self.device)
|
||||
_ = self.model.generate(inputs, language="chinese")
|
||||
|
||||
def audio2text(self, audio_path):
|
||||
"""Convert audio to text.
|
||||
|
||||
audio_path: the path to the input audio, e.g. ~/xxx.mp3
|
||||
"""
|
||||
start = time.time()
|
||||
|
||||
try:
|
||||
waveform = AudioSegment.from_file(audio_path).set_frame_rate(16000)
|
||||
waveform = self._audiosegment_to_librosawav(waveform)
|
||||
except Exception as e:
|
||||
print(f"[ASR] audiosegment to librosa wave fail: {e}")
|
||||
audio_dataset = Dataset.from_dict({"audio": [audio_path]}).cast_column("audio", Audio(sampling_rate=16000))
|
||||
waveform = audio_dataset[0]["audio"]["array"]
|
||||
|
||||
# pylint: disable=E1101
|
||||
inputs = self.processor.feature_extractor(
|
||||
waveform, return_tensors="pt", sampling_rate=16_000
|
||||
).input_features.to(self.device)
|
||||
with torch.cpu.amp.autocast() if self.bf16 else contextlib.nullcontext():
|
||||
predicted_ids = self.model.generate(inputs, language=self.language)
|
||||
# pylint: disable=E1101
|
||||
result = self.processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True, normalize=True)[0]
|
||||
if self.language in ["chinese", "mandarin"]:
|
||||
from zhconv import convert
|
||||
|
||||
result = convert(result, "zh-cn")
|
||||
print(f"generated text in {time.time() - start} seconds, and the result is: {result}")
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asr = AudioSpeechRecognition(language="english")
|
||||
|
||||
# Test multilanguage asr
|
||||
urllib.request.urlretrieve(
|
||||
"https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav",
|
||||
"sample.wav",
|
||||
)
|
||||
asr.language = "chinese"
|
||||
text = asr.audio2text("sample.wav")
|
||||
|
||||
urllib.request.urlretrieve(
|
||||
"https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav",
|
||||
"sample.wav",
|
||||
)
|
||||
text = asr.audio2text("sample.wav")
|
||||
|
||||
os.remove("sample.wav")
|
||||
@@ -1,69 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import uvicorn
|
||||
from asr import AudioSpeechRecognition
|
||||
from fastapi import FastAPI, File, UploadFile
|
||||
from fastapi.responses import Response
|
||||
from pydub import AudioSegment
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
app = FastAPI()
|
||||
asr = None
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
|
||||
)
|
||||
|
||||
|
||||
@app.get("/v1/health")
|
||||
async def health() -> Response:
|
||||
"""Health check."""
|
||||
return Response(status_code=200)
|
||||
|
||||
|
||||
@app.post("/v1/audio/transcriptions")
|
||||
async def audio_to_text(file: UploadFile = File(...)):
|
||||
file_name = file.filename
|
||||
print(f"Received file: {file_name}")
|
||||
with open("tmp_audio_bytes", "wb") as fout:
|
||||
content = await file.read()
|
||||
fout.write(content)
|
||||
audio = AudioSegment.from_file("tmp_audio_bytes")
|
||||
audio = audio.set_frame_rate(16000)
|
||||
# bytes to wav
|
||||
file_name = file_name + ".wav"
|
||||
audio.export(f"{file_name}", format="wav")
|
||||
try:
|
||||
asr_result = asr.audio2text(file_name)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
asr_result = e
|
||||
finally:
|
||||
os.remove(file_name)
|
||||
os.remove("tmp_audio_bytes")
|
||||
return {"asr_result": asr_result}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--port", type=int, default=8008)
|
||||
parser.add_argument("--model_name_or_path", type=str, default="openai/whisper-tiny")
|
||||
parser.add_argument("--bf16", default=False, action="store_true")
|
||||
parser.add_argument("--language", type=str, default="english")
|
||||
parser.add_argument("--device", type=str, default="cpu")
|
||||
|
||||
args = parser.parse_args()
|
||||
asr = AudioSpeechRecognition(
|
||||
model_name_or_path=args.model_name_or_path, bf16=args.bf16, language=args.language, device=args.device
|
||||
)
|
||||
|
||||
uvicorn.run(app, host=args.host, port=args.port)
|
||||
@@ -1,11 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
datasets
|
||||
fastapi
|
||||
ffmpeg-python
|
||||
numpy
|
||||
pydub
|
||||
python-multipart
|
||||
torch==2.2.0
|
||||
transformers
|
||||
uvicorn
|
||||
zhconv
|
||||
@@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
#
|
||||
# This script is adapted from
|
||||
# https://github.com/RVC-Boss/GPT-SoVITS/blob/main/api.py
|
||||
# which is under the MIT license
|
||||
#
|
||||
# Copyright (c) 2024 RVC-Boss
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import torch
|
||||
|
||||
sovits_path = ""
|
||||
gpt_path = ""
|
||||
is_half_str = os.environ.get("is_half", "True")
|
||||
is_half = True if is_half_str.lower() == "true" else False
|
||||
is_share_str = os.environ.get("is_share", "False")
|
||||
is_share = True if is_share_str.lower() == "true" else False
|
||||
|
||||
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
||||
bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
|
||||
pretrained_sovits_path = "GPT_SoVITS/pretrained_models/s2G488k.pth"
|
||||
pretrained_gpt_path = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
||||
|
||||
exp_root = "logs"
|
||||
python_exec = sys.executable or "python"
|
||||
if torch.cuda.is_available():
|
||||
infer_device = "cuda"
|
||||
else:
|
||||
infer_device = "cpu"
|
||||
|
||||
webui_port_main = 9874
|
||||
webui_port_uvr5 = 9873
|
||||
webui_port_infer_tts = 9872
|
||||
webui_port_subfix = 9871
|
||||
|
||||
api_port = 9880
|
||||
|
||||
if infer_device == "cuda":
|
||||
gpu_name = torch.cuda.get_device_name(0)
|
||||
if (
|
||||
("16" in gpu_name and "V100" not in gpu_name.upper())
|
||||
or "P40" in gpu_name.upper()
|
||||
or "P10" in gpu_name.upper()
|
||||
or "1060" in gpu_name
|
||||
or "1070" in gpu_name
|
||||
or "1080" in gpu_name
|
||||
):
|
||||
is_half = False
|
||||
|
||||
if infer_device == "cpu":
|
||||
is_half = False
|
||||
use_bf16 = False
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self):
|
||||
self.sovits_path = sovits_path
|
||||
self.gpt_path = gpt_path
|
||||
self.is_half = is_half
|
||||
self.use_bf16 = use_bf16
|
||||
|
||||
self.cnhubert_path = cnhubert_path
|
||||
self.bert_path = bert_path
|
||||
self.pretrained_sovits_path = pretrained_sovits_path
|
||||
self.pretrained_gpt_path = pretrained_gpt_path
|
||||
|
||||
self.exp_root = exp_root
|
||||
self.python_exec = python_exec
|
||||
self.infer_device = infer_device
|
||||
|
||||
self.webui_port_main = webui_port_main
|
||||
self.webui_port_uvr5 = webui_port_uvr5
|
||||
self.webui_port_infer_tts = webui_port_infer_tts
|
||||
self.webui_port_subfix = webui_port_subfix
|
||||
|
||||
self.api_port = api_port
|
||||
@@ -1,28 +0,0 @@
|
||||
chardet
|
||||
# funasr==1.0.0
|
||||
cn2an
|
||||
# gradio==3.38.0
|
||||
# gradio_client==0.8.1
|
||||
ffmpeg-python
|
||||
g2p_en
|
||||
jieba
|
||||
jieba_fast
|
||||
LangSegment>=0.2.0
|
||||
# tensorboard
|
||||
librosa==0.9.2
|
||||
numba==0.56.4
|
||||
numpy
|
||||
psutil
|
||||
pyopenjtalk
|
||||
pypinyin
|
||||
pytorch-lightning
|
||||
PyYAML
|
||||
scipy
|
||||
# modelscope==1.10.0
|
||||
sentencepiece
|
||||
torchaudio
|
||||
# onnxruntime
|
||||
tqdm
|
||||
transformers
|
||||
# Faster_Whisper
|
||||
wordsegment
|
||||
@@ -1,741 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
#
|
||||
# This script is adapted from
|
||||
# https://github.com/RVC-Boss/GPT-SoVITS/blob/main/api.py
|
||||
# which is under the MIT license
|
||||
#
|
||||
# Copyright (c) 2024 RVC-Boss
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from io import BytesIO
|
||||
from time import time as ttime
|
||||
|
||||
import config as global_config
|
||||
import LangSegment
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import uvicorn
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from fastapi import FastAPI, File, Form, HTTPException, Request, UploadFile
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from feature_extractor import cnhubert
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from module.models import SynthesizerTrn
|
||||
from my_utils import load_audio
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
from text import cleaned_text_to_sequence
|
||||
from text.cleaner import clean_text
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
|
||||
|
||||
class DefaultRefer:
|
||||
def __init__(self, path, text, language):
|
||||
self.path = args.default_refer_path
|
||||
self.text = args.default_refer_text
|
||||
self.language = args.default_refer_language
|
||||
|
||||
def is_ready(self) -> bool:
|
||||
return is_full(self.path, self.text, self.language)
|
||||
|
||||
|
||||
def is_empty(*items):
|
||||
for item in items:
|
||||
if item is not None and item != "":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_full(*items):
|
||||
for item in items:
|
||||
if item is None or item == "":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def change_sovits_weights(sovits_path):
|
||||
global vq_model, hps
|
||||
dict_s2 = torch.load(sovits_path, map_location="cpu")
|
||||
hps = dict_s2["config"]
|
||||
hps = DictToAttrRecursive(hps)
|
||||
hps.model.semantic_frame_rate = "25hz"
|
||||
vq_model = SynthesizerTrn(
|
||||
hps.data.filter_length // 2 + 1,
|
||||
hps.train.segment_size // hps.data.hop_length,
|
||||
n_speakers=hps.data.n_speakers,
|
||||
**hps.model,
|
||||
)
|
||||
if "pretrained" not in sovits_path:
|
||||
del vq_model.enc_q
|
||||
if is_half:
|
||||
vq_model = vq_model.half().to(device)
|
||||
else:
|
||||
vq_model = vq_model.to(device)
|
||||
vq_model.eval()
|
||||
vq_model.load_state_dict(dict_s2["weight"], strict=False)
|
||||
|
||||
|
||||
def change_gpt_weights(gpt_path):
|
||||
global hz, max_sec, t2s_model, config
|
||||
hz = 50
|
||||
dict_s1 = torch.load(gpt_path, map_location="cpu")
|
||||
config = dict_s1["config"]
|
||||
max_sec = config["data"]["max_sec"]
|
||||
t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
|
||||
t2s_model.load_state_dict(dict_s1["weight"])
|
||||
if is_half:
|
||||
t2s_model = t2s_model.half()
|
||||
t2s_model = t2s_model.to(device)
|
||||
t2s_model.eval()
|
||||
total = sum([param.nelement() for param in t2s_model.parameters()])
|
||||
logger.info("Number of parameter: %.2fM" % (total / 1e6))
|
||||
|
||||
|
||||
def get_bert_feature(text, word2ph):
|
||||
with torch.no_grad():
|
||||
inputs = tokenizer(text, return_tensors="pt")
|
||||
for i in inputs:
|
||||
inputs[i] = inputs[i].to(device)
|
||||
res = bert_model(**inputs, output_hidden_states=True)
|
||||
res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
|
||||
assert len(word2ph) == len(text)
|
||||
phone_level_feature = []
|
||||
for i in range(len(word2ph)):
|
||||
repeat_feature = res[i].repeat(word2ph[i], 1)
|
||||
phone_level_feature.append(repeat_feature)
|
||||
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
||||
return phone_level_feature.T
|
||||
|
||||
|
||||
def clean_text_inf(text, language):
|
||||
phones, word2ph, norm_text = clean_text(text, language)
|
||||
phones = cleaned_text_to_sequence(phones)
|
||||
return phones, word2ph, norm_text
|
||||
|
||||
|
||||
def get_bert_inf(phones, word2ph, norm_text, language):
|
||||
language = language.replace("all_", "")
|
||||
if language == "zh":
|
||||
bert = get_bert_feature(norm_text, word2ph).to(device)
|
||||
else:
|
||||
bert = torch.zeros(
|
||||
(1024, len(phones)),
|
||||
dtype=torch.float16 if is_half else torch.float32,
|
||||
).to(device)
|
||||
|
||||
return bert
|
||||
|
||||
|
||||
def get_phones_and_bert(text, language):
|
||||
if language in {"en", "all_zh", "all_ja"}:
|
||||
language = language.replace("all_", "")
|
||||
if language == "en":
|
||||
LangSegment.setfilters(["en"])
|
||||
formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text))
|
||||
else:
|
||||
formattext = text
|
||||
while " " in formattext:
|
||||
formattext = formattext.replace(" ", " ")
|
||||
phones, word2ph, norm_text = clean_text_inf(formattext, language)
|
||||
if language == "zh":
|
||||
bert = get_bert_feature(norm_text, word2ph).to(device)
|
||||
else:
|
||||
bert = torch.zeros(
|
||||
(1024, len(phones)),
|
||||
dtype=torch.float16 if is_half else torch.float32,
|
||||
).to(device)
|
||||
elif language in {"zh", "ja", "auto"}:
|
||||
textlist = []
|
||||
langlist = []
|
||||
LangSegment.setfilters(["zh", "ja", "en", "ko"])
|
||||
if language == "auto":
|
||||
for tmp in LangSegment.getTexts(text):
|
||||
if tmp["lang"] == "ko":
|
||||
langlist.append("zh")
|
||||
textlist.append(tmp["text"])
|
||||
else:
|
||||
langlist.append(tmp["lang"])
|
||||
textlist.append(tmp["text"])
|
||||
else:
|
||||
for tmp in LangSegment.getTexts(text):
|
||||
if tmp["lang"] == "en":
|
||||
langlist.append(tmp["lang"])
|
||||
else:
|
||||
langlist.append(language)
|
||||
textlist.append(tmp["text"])
|
||||
|
||||
phones_list = []
|
||||
bert_list = []
|
||||
norm_text_list = []
|
||||
for i in range(len(textlist)):
|
||||
lang = langlist[i]
|
||||
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang)
|
||||
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
||||
phones_list.append(phones)
|
||||
norm_text_list.append(norm_text)
|
||||
bert_list.append(bert)
|
||||
bert = torch.cat(bert_list, dim=1)
|
||||
phones = sum(phones_list, [])
|
||||
norm_text = "".join(norm_text_list)
|
||||
|
||||
return phones, bert.to(torch.float16 if is_half else torch.float32), norm_text
|
||||
|
||||
|
||||
class DictToAttrRecursive:
|
||||
def __init__(self, input_dict):
|
||||
for key, value in input_dict.items():
|
||||
if isinstance(value, dict):
|
||||
setattr(self, key, DictToAttrRecursive(value))
|
||||
else:
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
def get_spepc(hps, filename):
|
||||
audio = load_audio(filename, int(hps.data.sampling_rate))
|
||||
audio = torch.FloatTensor(audio)
|
||||
audio_norm = audio
|
||||
audio_norm = audio_norm.unsqueeze(0)
|
||||
spec = spectrogram_torch(
|
||||
audio_norm,
|
||||
hps.data.filter_length,
|
||||
hps.data.sampling_rate,
|
||||
hps.data.hop_length,
|
||||
hps.data.win_length,
|
||||
center=False,
|
||||
)
|
||||
return spec
|
||||
|
||||
|
||||
def pack_audio(audio_bytes, data, rate):
|
||||
if media_type == "ogg":
|
||||
audio_bytes = pack_ogg(audio_bytes, data, rate)
|
||||
elif media_type == "aac":
|
||||
audio_bytes = pack_aac(audio_bytes, data, rate)
|
||||
else:
|
||||
audio_bytes = pack_raw(audio_bytes, data, rate)
|
||||
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def pack_ogg(audio_bytes, data, rate):
|
||||
with sf.SoundFile(audio_bytes, mode="w", samplerate=rate, channels=1, format="ogg") as audio_file:
|
||||
audio_file.write(data)
|
||||
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def pack_raw(audio_bytes, data, rate):
|
||||
audio_bytes.write(data.tobytes())
|
||||
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def pack_wav(audio_bytes, rate):
|
||||
data = np.frombuffer(audio_bytes.getvalue(), dtype=np.int16)
|
||||
wav_bytes = BytesIO()
|
||||
sf.write(wav_bytes, data, rate, format="wav")
|
||||
|
||||
return wav_bytes
|
||||
|
||||
|
||||
def pack_aac(audio_bytes, data, rate):
|
||||
process = subprocess.Popen(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-f",
|
||||
"s16le",
|
||||
"-ar",
|
||||
str(rate),
|
||||
"-ac",
|
||||
"1",
|
||||
"-i",
|
||||
"pipe:0",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-b:a",
|
||||
"192k",
|
||||
"-vn",
|
||||
"-f",
|
||||
"adts",
|
||||
"pipe:1",
|
||||
],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
out, _ = process.communicate(input=data.tobytes())
|
||||
audio_bytes.write(out)
|
||||
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def read_clean_buffer(audio_bytes):
|
||||
audio_chunk = audio_bytes.getvalue()
|
||||
audio_bytes.truncate(0)
|
||||
audio_bytes.seek(0)
|
||||
|
||||
return audio_bytes, audio_chunk
|
||||
|
||||
|
||||
def cut_text(text, punc):
|
||||
text = re.escape(text)
|
||||
punc_list = [",", ".", ";", "?", "!", "、", ",", "。", "?", "!", ";", ":", "…"]
|
||||
if len(punc_list) > 0:
|
||||
punds = r"[" + "".join(punc_list) + r"]"
|
||||
text = text.strip("\n")
|
||||
items = re.split(f"({punds})", text)
|
||||
mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])]
|
||||
if len(items) % 2 == 1:
|
||||
mergeitems.append(items[-1])
|
||||
text = "\n".join(mergeitems)
|
||||
|
||||
while "\n\n" in text:
|
||||
text = text.replace("\n\n", "\n")
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def only_punc(text):
|
||||
return not any(t.isalnum() or t.isalpha() for t in text)
|
||||
|
||||
|
||||
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language):
|
||||
t0 = ttime()
|
||||
prompt_text = prompt_text.strip("\n")
|
||||
prompt_language, text = prompt_language, text.strip("\n")
|
||||
zero_wav = np.zeros(int(hps.data.sampling_rate * 0.3), dtype=np.float16 if is_half else np.float32)
|
||||
with torch.no_grad():
|
||||
wav16k, sr = librosa.load(ref_wav_path, sr=16000)
|
||||
wav16k = torch.from_numpy(wav16k)
|
||||
zero_wav_torch = torch.from_numpy(zero_wav)
|
||||
if is_half:
|
||||
wav16k = wav16k.half().to(device)
|
||||
zero_wav_torch = zero_wav_torch.half().to(device)
|
||||
else:
|
||||
wav16k = wav16k.to(device)
|
||||
zero_wav_torch = zero_wav_torch.to(device)
|
||||
wav16k = torch.cat([wav16k, zero_wav_torch])
|
||||
ssl_content = ssl_model.model(wav16k.unsqueeze(0))["last_hidden_state"].transpose(1, 2) # .float()
|
||||
codes = vq_model.extract_latent(ssl_content)
|
||||
prompt_semantic = codes[0, 0]
|
||||
t1 = ttime()
|
||||
prompt_language = dict_language[prompt_language.lower()]
|
||||
text_language = dict_language[text_language.lower()]
|
||||
phones1, bert1, norm_text1 = get_phones_and_bert(prompt_text, prompt_language)
|
||||
texts = text.split("\n")
|
||||
audio_bytes = BytesIO()
|
||||
|
||||
for text in texts:
|
||||
if only_punc(text):
|
||||
continue
|
||||
|
||||
audio_opt = []
|
||||
phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language)
|
||||
bert = torch.cat([bert1, bert2], 1)
|
||||
|
||||
all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
|
||||
bert = bert.to(device).unsqueeze(0)
|
||||
all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device)
|
||||
prompt = prompt_semantic.unsqueeze(0).to(device)
|
||||
# import intel_extension_for_pytorch as ipex
|
||||
# ipex.optimize(t2s_model.model)
|
||||
# from torch import profiler
|
||||
t2 = ttime()
|
||||
with torch.no_grad():
|
||||
# with profiler.profile(record_shapes=True) as prof:
|
||||
# with profiler.record_function("model_inference"):
|
||||
with (
|
||||
torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=True)
|
||||
if use_bf16
|
||||
else contextlib.nullcontext()
|
||||
):
|
||||
pred_semantic, idx = t2s_model.model.infer_panel(
|
||||
all_phoneme_ids,
|
||||
all_phoneme_len,
|
||||
prompt,
|
||||
bert,
|
||||
# prompt_phone_len=ph_offset,
|
||||
top_k=config["inference"]["top_k"],
|
||||
early_stop_num=hz * max_sec,
|
||||
)
|
||||
# print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
|
||||
t3 = ttime()
|
||||
pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)
|
||||
refer = get_spepc(hps, ref_wav_path)
|
||||
if is_half:
|
||||
refer = refer.half().to(device)
|
||||
else:
|
||||
refer = refer.to(device)
|
||||
audio = (
|
||||
vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refer)
|
||||
.detach()
|
||||
.cpu()
|
||||
.numpy()[0, 0]
|
||||
)
|
||||
audio_opt.append(audio)
|
||||
audio_opt.append(zero_wav)
|
||||
t4 = ttime()
|
||||
audio_bytes = pack_audio(
|
||||
audio_bytes, (np.concatenate(audio_opt, 0) * 32768).astype(np.int16), hps.data.sampling_rate
|
||||
)
|
||||
logger.info("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3))
|
||||
if stream_mode == "normal":
|
||||
audio_bytes, audio_chunk = read_clean_buffer(audio_bytes)
|
||||
yield audio_chunk
|
||||
|
||||
if not stream_mode == "normal":
|
||||
if media_type == "wav":
|
||||
audio_bytes = pack_wav(audio_bytes, hps.data.sampling_rate)
|
||||
yield audio_bytes.getvalue()
|
||||
|
||||
|
||||
def handle_control(command):
|
||||
if command == "restart":
|
||||
os.execl(g_config.python_exec, g_config.python_exec, *sys.argv)
|
||||
elif command == "exit":
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
exit(0)
|
||||
|
||||
|
||||
def handle_change(path, text, language):
|
||||
if is_empty(path, text, language):
|
||||
return JSONResponse(
|
||||
{"code": 400, "message": 'missing any of the following parameters: "path", "text", "language"'},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
if path != "" or path is not None:
|
||||
default_refer.path = path
|
||||
if text != "" or text is not None:
|
||||
default_refer.text = text
|
||||
if language != "" or language is not None:
|
||||
default_refer.language = language
|
||||
|
||||
logger.info(f"current default reference audio path: {default_refer.path}")
|
||||
logger.info(f"current default reference audio text: {default_refer.text}")
|
||||
logger.info(f"current default reference audio language: {default_refer.language}")
|
||||
logger.info(f"is_ready: {default_refer.is_ready()}")
|
||||
|
||||
return JSONResponse({"code": 0, "message": "Success"}, status_code=200)
|
||||
|
||||
|
||||
def text_stream_generator(result):
|
||||
"""Embed the unicode byte values to base64 and yield the text stream with data prefix.
|
||||
|
||||
Accepts a generator of bytes
|
||||
Returns a generator of string
|
||||
"""
|
||||
for bytes in result:
|
||||
data = base64.b64encode(bytes)
|
||||
yield f"data: {data}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc):
|
||||
if (
|
||||
refer_wav_path == ""
|
||||
or refer_wav_path is None
|
||||
or prompt_text == ""
|
||||
or prompt_text is None
|
||||
or prompt_language == ""
|
||||
or prompt_language is None
|
||||
):
|
||||
refer_wav_path, prompt_text, prompt_language = (
|
||||
default_refer.path,
|
||||
default_refer.text,
|
||||
default_refer.language,
|
||||
)
|
||||
if not default_refer.is_ready():
|
||||
return JSONResponse({"code": 400, "message": "unspecified refer audio!"}, status_code=400)
|
||||
|
||||
if cut_punc is None:
|
||||
text = cut_text(text, default_cut_punc)
|
||||
else:
|
||||
text = cut_text(text, cut_punc)
|
||||
|
||||
if not return_text_stream:
|
||||
return StreamingResponse(
|
||||
get_tts_wav(refer_wav_path, prompt_text, prompt_language, text, text_language),
|
||||
media_type="audio/" + media_type,
|
||||
)
|
||||
else:
|
||||
result = get_tts_wav(refer_wav_path, prompt_text, prompt_language, text, text_language)
|
||||
|
||||
return StreamingResponse(text_stream_generator(result), media_type="text/event-stream")
|
||||
|
||||
|
||||
# --------------------------------
|
||||
# Initialization part
|
||||
# --------------------------------
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
||||
|
||||
dict_language = {
|
||||
"中文": "all_zh",
|
||||
"英文": "en",
|
||||
"日文": "all_ja",
|
||||
"中英混合": "zh",
|
||||
"日英混合": "ja",
|
||||
"多语种混合": "auto",
|
||||
"all_zh": "all_zh",
|
||||
"en": "en",
|
||||
"all_ja": "all_ja",
|
||||
"zh": "zh",
|
||||
"ja": "ja",
|
||||
"auto": "auto",
|
||||
}
|
||||
|
||||
logging.config.dictConfig(uvicorn.config.LOGGING_CONFIG)
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
g_config = global_config.Config()
|
||||
|
||||
parser = argparse.ArgumentParser(description="GPT-SoVITS api")
|
||||
|
||||
parser.add_argument("-s", "--sovits_path", type=str, default=g_config.sovits_path, help="SoVITS model path")
|
||||
parser.add_argument("-g", "--gpt_path", type=str, default=g_config.gpt_path, help="GPT model path")
|
||||
parser.add_argument("-dr", "--default_refer_path", type=str, default="", help="default reference audio path")
|
||||
parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="default reference audio text")
|
||||
parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="default reference audio language")
|
||||
parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu")
|
||||
parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0")
|
||||
parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880")
|
||||
parser.add_argument(
|
||||
"-fp", "--full_precision", action="store_true", default=False, help="overwrite config.is_half, use fp32"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-hp", "--half_precision", action="store_true", default=False, help="overwrite config.is_half, use fp16"
|
||||
)
|
||||
# Here add an argument for specifying torch.bfloat16 inference on Xeon CPU
|
||||
parser.add_argument("-bf16", "--bf16", action="store_true", default=False, help="use bfloat16")
|
||||
parser.add_argument(
|
||||
"-sm", "--stream_mode", type=str, default="close", help="streaming response, close / normal / keepalive"
|
||||
)
|
||||
parser.add_argument("-mt", "--media_type", type=str, default="wav", help="media type, wav / ogg / aac")
|
||||
parser.add_argument("-cp", "--cut_punc", type=str, default="", help="text splitter, among ,.;?!、,。?!;:…")
|
||||
parser.add_argument(
|
||||
"-hb", "--hubert_path", type=str, default=g_config.cnhubert_path, help="overwrite config.cnhubert_path"
|
||||
)
|
||||
parser.add_argument("-b", "--bert_path", type=str, default=g_config.bert_path, help="overwrite config.bert_path")
|
||||
# Here add an argument to decide whether to return text/event-stream base64 encoded bytes to frontend
|
||||
# rather than audio bytes
|
||||
parser.add_argument(
|
||||
"-rts",
|
||||
"--return_text_stream",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="whether to return text/event-stream base64 encoded bytes to frontend",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
sovits_path = args.sovits_path
|
||||
gpt_path = args.gpt_path
|
||||
device = args.device
|
||||
port = args.port
|
||||
host = args.bind_addr
|
||||
cnhubert_base_path = args.hubert_path
|
||||
bert_path = args.bert_path
|
||||
default_cut_punc = args.cut_punc
|
||||
return_text_stream = args.return_text_stream
|
||||
|
||||
# Set default reference configuration
|
||||
default_refer = DefaultRefer(args.default_refer_path, args.default_refer_text, args.default_refer_language)
|
||||
|
||||
# Check model paths
|
||||
if sovits_path == "":
|
||||
sovits_path = g_config.pretrained_sovits_path
|
||||
logger.warn(f"Unspecified SOVITS model path, fallback to current path: {sovits_path}")
|
||||
if gpt_path == "":
|
||||
gpt_path = g_config.pretrained_gpt_path
|
||||
logger.warn(f"Unspecified GPT model path, fallback to current path: {gpt_path}")
|
||||
|
||||
if default_refer.path == "" or default_refer.text == "" or default_refer.language == "":
|
||||
default_refer.path, default_refer.text, default_refer.language = "", "", ""
|
||||
logger.info("Unspecified default refer audio")
|
||||
else:
|
||||
logger.info(f"default refer audio path: {default_refer.path}")
|
||||
logger.info(f"default refer audio text: {default_refer.text}")
|
||||
logger.info(f"default refer audio language: {default_refer.language}")
|
||||
|
||||
# deal with half precision
|
||||
if device == "cuda":
|
||||
is_half = g_config.is_half
|
||||
use_bf16 = False
|
||||
if args.full_precision:
|
||||
is_half = False
|
||||
if args.half_precision:
|
||||
is_half = True
|
||||
if args.full_precision and args.half_precision:
|
||||
is_half = g_config.is_half # fallback to fp32
|
||||
logger.info(f"fp16 half: {is_half}")
|
||||
else:
|
||||
is_half = False
|
||||
use_bf16 = g_config.use_bf16
|
||||
if args.full_precision:
|
||||
use_bf16 = False
|
||||
elif args.bf16:
|
||||
use_bf16 = True
|
||||
|
||||
logger.info(f"bf16 half: {use_bf16}")
|
||||
|
||||
# stream response mode
|
||||
if args.stream_mode.lower() in ["normal", "n"]:
|
||||
stream_mode = "normal"
|
||||
logger.info("stream response mode enabled")
|
||||
else:
|
||||
stream_mode = "close"
|
||||
|
||||
# media type
|
||||
if args.media_type.lower() in ["aac", "ogg"]:
|
||||
media_type = args.media_type.lower()
|
||||
elif stream_mode == "close":
|
||||
media_type = "wav"
|
||||
else:
|
||||
media_type = "ogg"
|
||||
logger.info(f"media type: {media_type}")
|
||||
|
||||
# Initialize the model
|
||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||
tokenizer = AutoTokenizer.from_pretrained(bert_path)
|
||||
bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
|
||||
ssl_model = cnhubert.get_model()
|
||||
if is_half:
|
||||
bert_model = bert_model.half().to(device)
|
||||
ssl_model = ssl_model.half().to(device)
|
||||
else:
|
||||
bert_model = bert_model.to(device)
|
||||
ssl_model = ssl_model.to(device)
|
||||
change_sovits_weights(sovits_path)
|
||||
change_gpt_weights(gpt_path)
|
||||
|
||||
|
||||
# --------------------------------
|
||||
# APIs
|
||||
# --------------------------------
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
|
||||
)
|
||||
|
||||
|
||||
@app.post("/set_model")
|
||||
async def set_model(request: Request):
|
||||
json_post_raw = await request.json()
|
||||
global gpt_path
|
||||
gpt_path = json_post_raw.get("gpt_model_path")
|
||||
global sovits_path
|
||||
sovits_path = json_post_raw.get("sovits_model_path")
|
||||
logger.info("gptpath" + gpt_path + ";vitspath" + sovits_path)
|
||||
change_sovits_weights(sovits_path)
|
||||
change_gpt_weights(gpt_path)
|
||||
return "ok"
|
||||
|
||||
|
||||
@app.post("/control")
|
||||
async def control_req(request: Request):
|
||||
json_post_raw = await request.json()
|
||||
return handle_control(json_post_raw.get("command"))
|
||||
|
||||
|
||||
@app.get("/control")
|
||||
async def control(command: str = None):
|
||||
return handle_control(command)
|
||||
|
||||
|
||||
@app.post("/change_refer")
|
||||
async def change_refer_req(request: Request):
|
||||
json_post_raw = await request.json()
|
||||
return handle_change(
|
||||
json_post_raw.get("refer_wav_path"), json_post_raw.get("prompt_text"), json_post_raw.get("prompt_language")
|
||||
)
|
||||
|
||||
|
||||
@app.get("/change_refer")
|
||||
async def change_refer(refer_wav_path: str = None, prompt_text: str = None, prompt_language: str = None):
|
||||
return handle_change(refer_wav_path, prompt_text, prompt_language)
|
||||
|
||||
|
||||
@app.post("/v1/audio/speech")
|
||||
async def tts_endpoint_req(request: Request):
|
||||
json_post_raw = await request.json()
|
||||
return handle(
|
||||
json_post_raw.get("refer_wav_path"),
|
||||
json_post_raw.get("prompt_text"),
|
||||
json_post_raw.get("prompt_language"),
|
||||
json_post_raw.get("text"),
|
||||
json_post_raw.get("text_language"),
|
||||
json_post_raw.get("cut_punc"),
|
||||
)
|
||||
|
||||
|
||||
@app.get("/v1/audio/speech")
|
||||
async def tts_endpoint(
|
||||
refer_wav_path: str = None,
|
||||
prompt_text: str = None,
|
||||
prompt_language: str = None,
|
||||
text: str = None,
|
||||
text_language: str = None,
|
||||
cut_punc: str = None,
|
||||
):
|
||||
return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc)
|
||||
|
||||
|
||||
@app.post("/upload_as_default")
|
||||
async def upload_audio(
|
||||
default_refer_file: UploadFile = File(...),
|
||||
default_refer_text: str = Form(...),
|
||||
default_refer_language: str = Form(...),
|
||||
):
|
||||
if not default_refer_file or not default_refer_file or not default_refer_language:
|
||||
return JSONResponse(
|
||||
{"code": 400, "message": "reference audio, text and language must be provided!"}, status_code=400
|
||||
)
|
||||
name = default_refer_file.filename
|
||||
|
||||
if name.endswith(".mp3") or name.endswith(".wav"):
|
||||
# temp file location
|
||||
tmp_file_location = f"/tmp/{name}"
|
||||
with open(tmp_file_location, "wb+") as f:
|
||||
f.write(default_refer_file.file.read())
|
||||
logger.info(f"reference audio saved at {tmp_file_location}!")
|
||||
return handle_change(path=tmp_file_location, text=default_refer_text, language=default_refer_language)
|
||||
else:
|
||||
return JSONResponse({"code": 400, "message": "audio name invalid!"}, status_code=400)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host=host, port=port, workers=1)
|
||||
@@ -1,38 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# SCRIPT USAGE NOTICE: By downloading and using any script file included
|
||||
# with the associated software package (such as files with .bat, .cmd, or
|
||||
# .JS extensions, Docker files, or any other type of file that, when executed,
|
||||
# automatically downloads and/or installs files onto your system) (the “Script File”),
|
||||
# it is your obligation to review the Script File to understand what files (e.g.,
|
||||
# other software, AI models, AI Datasets) the Script File will download to your system
|
||||
# (“Downloaded Files”). Furthermore, by downloading and using the Downloaded Files,
|
||||
# even if they are installed through a silent install, you agree to any and all
|
||||
# terms and conditions associated with such files, including but not limited to,
|
||||
# license terms, notices, or disclaimers.
|
||||
|
||||
FROM langchain/langchain:latest
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev
|
||||
|
||||
# RUN useradd -m -s /bin/bash user && \
|
||||
# mkdir -p /home/user && \
|
||||
# chown -R user /home/user/
|
||||
|
||||
# USER user
|
||||
|
||||
COPY requirements.txt /tmp/requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /tmp/requirements.txt
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/ws:/home/user:/home/user/qna-app/app
|
||||
|
||||
WORKDIR /home/user/qna-app
|
||||
COPY qna-app /home/user/qna-app
|
||||
|
||||
ENTRYPOINT ["/usr/bin/sleep", "infinity"]
|
||||
@@ -1,32 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
qna-rag-redis-server:
|
||||
build:
|
||||
args:
|
||||
https_proxy: ${https_proxy}
|
||||
dockerfile: Dockerfile
|
||||
image: intel/gen-ai-examples:qna-rag-redis-server
|
||||
container_name: qna-rag-redis-server
|
||||
environment:
|
||||
- https_proxy=${https_proxy}
|
||||
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
- "REDIS_PORT=6379"
|
||||
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
|
||||
- "REDIS_SCHEMA=schema_dim_768.yml"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1 # Set memlock to unlimited (no soft or hard limit)
|
||||
hard: -1
|
||||
volumes:
|
||||
- ../redis:/ws
|
||||
- ../test:/test
|
||||
network_mode: "host"
|
||||
@@ -1,25 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN pip install poetry==1.6.1
|
||||
|
||||
RUN poetry config virtualenvs.create false
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
COPY ./pyproject.toml ./README.md ./poetry.lock* ./
|
||||
|
||||
COPY ./package[s] ./packages
|
||||
|
||||
RUN poetry install --no-interaction --no-ansi --no-root
|
||||
|
||||
COPY ./app ./app
|
||||
|
||||
RUN poetry install --no-interaction --no-ansi
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080
|
||||
@@ -1,79 +0,0 @@
|
||||
# my-app
|
||||
|
||||
## Installation
|
||||
|
||||
Install the LangChain CLI if you haven't yet
|
||||
|
||||
```bash
|
||||
pip install -U langchain-cli
|
||||
```
|
||||
|
||||
## Adding packages
|
||||
|
||||
```bash
|
||||
# adding packages from
|
||||
# https://github.com/langchain-ai/langchain/tree/master/templates
|
||||
langchain app add $PROJECT_NAME
|
||||
|
||||
# adding custom GitHub repo packages
|
||||
langchain app add --repo $OWNER/$REPO
|
||||
# or with whole git string (supports other git providers):
|
||||
# langchain app add git+https://github.com/hwchase17/chain-of-verification
|
||||
|
||||
# with a custom api mount point (defaults to `/{package_name}`)
|
||||
langchain app add $PROJECT_NAME --api_path=/my/custom/path/rag
|
||||
```
|
||||
|
||||
Note: you remove packages by their api path
|
||||
|
||||
```bash
|
||||
langchain app remove my/custom/path/rag
|
||||
```
|
||||
|
||||
## Setup LangSmith (Optional)
|
||||
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
If you don't have access, you can skip this section
|
||||
|
||||
```shell
|
||||
export LANGCHAIN_TRACING_V2=true
|
||||
export LANGCHAIN_API_KEY=<your-api-key>
|
||||
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
|
||||
```
|
||||
|
||||
## Launch LangServe
|
||||
|
||||
```bash
|
||||
langchain serve
|
||||
```
|
||||
|
||||
## Running in Docker
|
||||
|
||||
This project folder includes a Dockerfile that allows you to easily build and host your LangServe app.
|
||||
|
||||
### Building the Image
|
||||
|
||||
To build the image, you simply:
|
||||
|
||||
```shell
|
||||
docker build . -t my-langserve-app
|
||||
```
|
||||
|
||||
If you tag your image with something other than `my-langserve-app`,
|
||||
note it for use in the next step.
|
||||
|
||||
### Running the Image Locally
|
||||
|
||||
To run the image, you'll need to include any environment variables
|
||||
necessary for your application.
|
||||
|
||||
In the below example, we inject the `OPENAI_API_KEY` environment
|
||||
variable with the value set in my local environment
|
||||
(`$OPENAI_API_KEY`)
|
||||
|
||||
We also expose port 8080 with the `-p 8080:8080` option.
|
||||
|
||||
```shell
|
||||
docker run -e OPENAI_API_KEY=$OPENAI_API_KEY -p 8080:8080 my-langserve-app
|
||||
```
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
@@ -1,90 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
unsafe_categories = """O1: Violence and Hate.
|
||||
Should not
|
||||
- Help people plan or engage in violence.
|
||||
- Encourage people to engage in violence.
|
||||
- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability.
|
||||
- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.
|
||||
Can
|
||||
- Provide information on violence and discrimination.
|
||||
- Discuss the topics of hate and violence, and can discuss historical events involving violence.
|
||||
O2: Sexual Content.
|
||||
Should not
|
||||
- Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities.
|
||||
Can
|
||||
- Discuss topics related to sexuality and sexual education.
|
||||
- Provide general information regarding sexual activities.
|
||||
- Provide guidance on topics related to sexual health.
|
||||
O3: Criminal Planning.
|
||||
Should not
|
||||
- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.
|
||||
- Encourage people to engage in criminal activities or condone criminal behavior.
|
||||
Can
|
||||
- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.
|
||||
- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.
|
||||
O4: Guns and Illegal Weapons.
|
||||
Should not
|
||||
- Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them.
|
||||
- Encourage people to engage in firearm or illegal weapons crimes or condone such behavior.
|
||||
Can
|
||||
- Discuss firearms and the arguments for and against firearm ownership.
|
||||
O5: Regulated or Controlled Substances.
|
||||
Should not
|
||||
- Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis.
|
||||
- Assist or encourage people to create such substances when it is illegal to do so.
|
||||
Can
|
||||
- Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are).
|
||||
- Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation.
|
||||
O6: Self-Harm.
|
||||
Should not
|
||||
- Encourage people to harm themselves.
|
||||
- Romanticize or condone self-harm or suicide.
|
||||
- Provide information regarding the methods of suicide or self-harm.
|
||||
- Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves.
|
||||
Can
|
||||
- Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions).
|
||||
Should
|
||||
- Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves."""
|
||||
|
||||
unsafe_dict = {
|
||||
"O1": "Violence and Hate",
|
||||
"O2": "Sexual Content",
|
||||
"O3": "Criminal Planning",
|
||||
"O4": "Guns and Illegal Weapons",
|
||||
"O5": "Regulated or Controlled Substances",
|
||||
"O6": "Self-Harm",
|
||||
}
|
||||
|
||||
|
||||
def moderation_prompt_for_chat(chat):
|
||||
# For simplicity, we assume the chat dictionary correctly alternates "user" and "assistant" roles
|
||||
# Feel free to include error checking if needed
|
||||
conversation = [turn["content"] for turn in chat]
|
||||
is_agent = len(conversation) % 2 == 0
|
||||
role = "Agent" if len(conversation) % 2 == 0 else "User"
|
||||
prompt = f"""[INST] Task: Check if there is unsafe content in '{role}' messages in conversations according our safety policy with the below categories.
|
||||
|
||||
<BEGIN UNSAFE CONTENT CATEGORIES>
|
||||
{unsafe_categories}
|
||||
<END UNSAFE CONTENT CATEGORIES>
|
||||
|
||||
<BEGIN CONVERSATION>
|
||||
|
||||
"""
|
||||
|
||||
# Alternate User/Agent turns, inserting 2 newlines between each
|
||||
for i, m in enumerate(conversation):
|
||||
role = "User" if i % 2 == 0 else "Agent"
|
||||
prompt += f"{role}: {m}\n\n"
|
||||
prompt += f"""<END CONVERSATION>
|
||||
|
||||
Provide your safety assessment for {role} in the above conversation:
|
||||
- First line must read 'safe' or 'unsafe'.
|
||||
- If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
|
||||
return prompt
|
||||
@@ -1,54 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
|
||||
# ========= Raw Q&A template prompt =========
|
||||
template = """### System:\n\n
|
||||
You are an assistant chatbot. You answer questions. \
|
||||
If you don't know the answer, just say that you don't know. \
|
||||
Use three sentences maximum and keep the answer concise.\
|
||||
### User:\n{question}\n### Assistant:\n"""
|
||||
prompt = ChatPromptTemplate.from_template(template)
|
||||
|
||||
|
||||
# ========= contextualize prompt =========
|
||||
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
||||
which might reference context in the chat history, formulate a standalone question \
|
||||
which can be understood without the chat history. Do NOT answer the question, \
|
||||
just reformulate it if needed and otherwise return it as is."""
|
||||
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", contextualize_q_system_prompt),
|
||||
MessagesPlaceholder(variable_name="chat_history"),
|
||||
("human", "{question}"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
# ========= Q&A with history prompt =========
|
||||
# qa_system_prompt = """You are an assistant for question-answering tasks. \
|
||||
# Use the following pieces of retrieved context to answer the question. \
|
||||
# If you don't know the answer, just say that you don't know. \
|
||||
# Use three sentences maximum and keep the answer concise.\
|
||||
|
||||
# {context}"""
|
||||
# qa_prompt = ChatPromptTemplate.from_messages(
|
||||
# [
|
||||
# ("system", qa_system_prompt),
|
||||
# MessagesPlaceholder(variable_name="chat_history"),
|
||||
# ("human", "{question}"),
|
||||
# ]
|
||||
# )
|
||||
template = """### System:\n\n
|
||||
You are an assistant chatbot. You answer questions. \
|
||||
Use the following pieces of retrieved context to answer the question. \
|
||||
If you don't know the answer, just say that you don't know. \
|
||||
Use three sentences maximum and keep the answer concise.\
|
||||
{context}
|
||||
### User:\n{question}\n### Assistant:\n"""
|
||||
qa_prompt = ChatPromptTemplate.from_template(template)
|
||||
@@ -1,322 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from fastapi import APIRouter, FastAPI, File, Request, UploadFile
|
||||
from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
|
||||
from guardrails import moderation_prompt_for_chat, unsafe_dict
|
||||
from langchain.globals import set_debug, set_verbose
|
||||
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.vectorstores import Redis
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langserve import add_routes
|
||||
from prompts import contextualize_q_prompt, prompt, qa_prompt
|
||||
from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
from utils import (
|
||||
create_kb_folder,
|
||||
create_retriever_from_files,
|
||||
create_retriever_from_links,
|
||||
get_current_beijing_time,
|
||||
post_process_text,
|
||||
reload_retriever,
|
||||
)
|
||||
|
||||
set_verbose(True)
|
||||
set_debug(True)
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
|
||||
)
|
||||
|
||||
|
||||
class RAGAPIRouter(APIRouter):
|
||||
|
||||
def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=None) -> None:
|
||||
super().__init__()
|
||||
self.upload_dir = upload_dir
|
||||
self.entrypoint = entrypoint
|
||||
self.safety_guard_endpoint = safety_guard_endpoint
|
||||
print(
|
||||
f"[rag - router] Initializing API Router, params:\n \
|
||||
upload_dir={upload_dir}, entrypoint={entrypoint}"
|
||||
)
|
||||
|
||||
# Define LLM
|
||||
self.llm = HuggingFaceEndpoint(
|
||||
endpoint_url=entrypoint,
|
||||
max_new_tokens=1024,
|
||||
top_k=10,
|
||||
top_p=0.95,
|
||||
typical_p=0.95,
|
||||
temperature=0.01,
|
||||
repetition_penalty=1.03,
|
||||
streaming=True,
|
||||
)
|
||||
# for NeuralChatEndpoint:
|
||||
"""
|
||||
self.llm = NeuralChatEndpoint(
|
||||
endpoint_url=entrypoint,
|
||||
max_new_tokens=1024,
|
||||
top_k=10,
|
||||
top_p=0.95,
|
||||
typical_p=0.95,
|
||||
temperature=0.01,
|
||||
repetition_penalty=1.03,
|
||||
streaming=True,
|
||||
)
|
||||
"""
|
||||
if self.safety_guard_endpoint:
|
||||
self.llm_guard = HuggingFaceEndpoint(
|
||||
endpoint_url=safety_guard_endpoint,
|
||||
max_new_tokens=100,
|
||||
top_k=1,
|
||||
top_p=0.95,
|
||||
typical_p=0.95,
|
||||
temperature=0.01,
|
||||
repetition_penalty=1.03,
|
||||
)
|
||||
print("[rag - router] LLM initialized.")
|
||||
|
||||
# Define LLM Chain
|
||||
if tei_endpoint:
|
||||
# create embeddings using TEI endpoint service
|
||||
self.embeddings = HuggingFaceHubEmbeddings(model=tei_endpoint)
|
||||
else:
|
||||
# create embeddings using local embedding model
|
||||
self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
|
||||
|
||||
try:
|
||||
rds = Redis.from_existing_index(
|
||||
self.embeddings,
|
||||
index_name=INDEX_NAME,
|
||||
redis_url=REDIS_URL,
|
||||
schema=INDEX_SCHEMA,
|
||||
)
|
||||
retriever = rds.as_retriever(search_type="mmr")
|
||||
except Exception as e:
|
||||
print(
|
||||
"[rag - chat] Initializing Redis RAG failure, will skip RAG and fallback to normal chat in the chain!"
|
||||
)
|
||||
retriever = None
|
||||
# Define contextualize chain
|
||||
# self.contextualize_q_chain = contextualize_q_prompt | self.llm | StrOutputParser()
|
||||
self.contextualize_q_chain = prompt | self.llm | StrOutputParser()
|
||||
|
||||
# Define LLM chain
|
||||
if retriever:
|
||||
self.llm_chain = (
|
||||
RunnablePassthrough.assign(context=self.contextualized_question | retriever) | qa_prompt | self.llm
|
||||
)
|
||||
else:
|
||||
self.llm_chain = RunnablePassthrough.assign(context=self.contextualized_question) | prompt | self.llm
|
||||
print("[rag - router] LLM chain initialized.")
|
||||
|
||||
# Define chat history
|
||||
self.chat_history = []
|
||||
|
||||
def contextualized_question(self, input: dict):
|
||||
if input.get("chat_history"):
|
||||
return self.contextualize_q_chain
|
||||
else:
|
||||
return input["question"]
|
||||
|
||||
def handle_rag_chat(self, query: str):
|
||||
response = self.llm_chain.invoke({"question": query, "chat_history": self.chat_history})
|
||||
# response = self.llm_chain.invoke({"question": query})
|
||||
result = response.split("</s>")[0]
|
||||
self.chat_history.extend([HumanMessage(content=query), response])
|
||||
# output guardrails
|
||||
if self.safety_guard_endpoint:
|
||||
response_output_guard = self.llm_guard(
|
||||
moderation_prompt_for_chat("Agent", f"User: {query}\n Agent: {response}")
|
||||
)
|
||||
if "unsafe" in response_output_guard:
|
||||
policy_violation_level = response_output_guard.split("\n")[1].strip()
|
||||
policy_violations = unsafe_dict[policy_violation_level]
|
||||
print(f"Violated policies: {policy_violations}")
|
||||
return policy_violations + " are found in the output"
|
||||
else:
|
||||
return result.lstrip()
|
||||
return result.lstrip()
|
||||
|
||||
|
||||
upload_dir = os.getenv("RAG_UPLOAD_DIR", "./upload_dir")
|
||||
tgi_llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
|
||||
safety_guard_endpoint = os.getenv("SAFETY_GUARD_ENDPOINT")
|
||||
tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
|
||||
router = RAGAPIRouter(upload_dir, tgi_llm_endpoint, safety_guard_endpoint, tei_embedding_endpoint)
|
||||
|
||||
|
||||
@router.post("/v1/rag/chat")
|
||||
async def rag_chat(request: Request):
|
||||
params = await request.json()
|
||||
print(f"[rag - chat] POST request: /v1/rag/chat, params:{params}")
|
||||
query = params["query"]
|
||||
kb_id = params.get("knowledge_base_id", "default")
|
||||
print(f"[rag - chat] history: {router.chat_history}")
|
||||
|
||||
# prompt guardrails
|
||||
if router.safety_guard_endpoint:
|
||||
response_input_guard = router.llm_guard(moderation_prompt_for_chat("User", query))
|
||||
if "unsafe" in response_input_guard:
|
||||
policy_violation_level = response_input_guard.split("\n")[1].strip()
|
||||
policy_violations = unsafe_dict[policy_violation_level]
|
||||
print(f"Violated policies: {policy_violations}")
|
||||
return f"Violated policies: {policy_violations}, please check your input."
|
||||
|
||||
if kb_id == "default":
|
||||
print("[rag - chat] use default knowledge base")
|
||||
new_index_name = INDEX_NAME
|
||||
elif kb_id.startswith("kb"):
|
||||
new_index_name = INDEX_NAME + kb_id
|
||||
print(f"[rag - chat] use knowledge base {kb_id}, index name is {new_index_name}")
|
||||
else:
|
||||
return JSONResponse(status_code=400, content={"message": "Wrong knowledge base id."})
|
||||
|
||||
try:
|
||||
retriever = reload_retriever(router.embeddings, new_index_name)
|
||||
router.llm_chain = (
|
||||
RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
|
||||
)
|
||||
except Exception as e:
|
||||
print("[rag - chat] Initializing Redis RAG failure, will skip RAG and fallback to normal chat in the chain!")
|
||||
return router.handle_rag_chat(query=query)
|
||||
|
||||
|
||||
@router.post("/v1/rag/chat_stream")
|
||||
async def rag_chat_stream(request: Request):
|
||||
params = await request.json()
|
||||
print(f"[rag - chat_stream] POST request: /v1/rag/chat_stream, params:{params}")
|
||||
query = params["query"]
|
||||
kb_id = params.get("knowledge_base_id", "default")
|
||||
print(f"[rag - chat_stream] history: {router.chat_history}")
|
||||
|
||||
# prompt guardrails
|
||||
if router.safety_guard_endpoint:
|
||||
response_input_guard = router.llm_guard(moderation_prompt_for_chat("User", query))
|
||||
if "unsafe" in response_input_guard:
|
||||
policy_violation_level = response_input_guard.split("\n")[1].strip()
|
||||
policy_violations = unsafe_dict[policy_violation_level]
|
||||
print(f"Violated policies: {policy_violations}")
|
||||
|
||||
def generate_content():
|
||||
content = f"Violated policies: {policy_violations}, please check your input."
|
||||
yield f"data: {content}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(generate_content(), media_type="text/event-stream")
|
||||
|
||||
if kb_id == "default":
|
||||
print("[rag - chat] use default knowledge base")
|
||||
new_index_name = INDEX_NAME
|
||||
elif kb_id.startswith("kb"):
|
||||
new_index_name = INDEX_NAME + kb_id
|
||||
print(f"[rag - chat] use knowledge base {kb_id}, index name is {new_index_name}")
|
||||
else:
|
||||
return JSONResponse(status_code=400, content={"message": "Wrong knowledge base id."})
|
||||
|
||||
try:
|
||||
retriever = reload_retriever(router.embeddings, new_index_name)
|
||||
router.llm_chain = (
|
||||
RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
|
||||
)
|
||||
except Exception as e:
|
||||
print("[rag - chat] Initializing Redis RAG failure, will skip RAG and fallback to normal chat in the chain!")
|
||||
|
||||
def stream_generator():
|
||||
chat_response = ""
|
||||
for text in router.llm_chain.stream({"question": query, "chat_history": router.chat_history}):
|
||||
# for text in router.llm_chain.stream({"question": query}):
|
||||
chat_response += text
|
||||
processed_text = post_process_text(text)
|
||||
if text is not None:
|
||||
yield processed_text
|
||||
chat_response = chat_response.split("</s>")[0]
|
||||
print(f"[rag - chat_stream] stream response: {chat_response}")
|
||||
router.chat_history.extend([HumanMessage(content=query), chat_response])
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@router.post("/v1/rag/create")
|
||||
async def rag_create(file: UploadFile = File(...)):
|
||||
filename = file.filename
|
||||
if "/" in filename:
|
||||
filename = filename.split("/")[-1]
|
||||
print(f"[rag - create] POST request: /v1/rag/create, filename:{filename}")
|
||||
|
||||
kb_id, user_upload_dir, user_persist_dir = create_kb_folder(router.upload_dir)
|
||||
# save file to local path
|
||||
cur_time = get_current_beijing_time()
|
||||
save_file_name = str(user_upload_dir) + "/" + cur_time + "-" + filename
|
||||
with open(save_file_name, "wb") as fout:
|
||||
content = await file.read()
|
||||
fout.write(content)
|
||||
print(f"[rag - create] file saved to local path: {save_file_name}")
|
||||
|
||||
# create new retriever
|
||||
try:
|
||||
# get retrieval instance and reload db with new knowledge base
|
||||
print("[rag - create] starting to create local db...")
|
||||
index_name = INDEX_NAME + kb_id
|
||||
retriever = create_retriever_from_files(save_file_name, router.embeddings, index_name)
|
||||
router.llm_chain = (
|
||||
RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
|
||||
)
|
||||
print("[rag - create] kb created successfully")
|
||||
except Exception as e:
|
||||
print(f"[rag - create] create knowledge base failed! {e}")
|
||||
return JSONResponse(status_code=500, content={"message": "Fail to create new knowledge base."})
|
||||
return {"knowledge_base_id": kb_id}
|
||||
|
||||
|
||||
@router.post("/v1/rag/upload_link")
|
||||
async def rag_upload_link(request: Request):
|
||||
params = await request.json()
|
||||
link_list = params["link_list"]
|
||||
print(f"[rag - upload_link] POST request: /v1/rag/upload_link, link list:{link_list}")
|
||||
|
||||
kb_id, user_upload_dir, user_persist_dir = create_kb_folder(router.upload_dir)
|
||||
|
||||
# create new retriever
|
||||
try:
|
||||
print("[rag - upload_link] starting to create local db...")
|
||||
index_name = INDEX_NAME + kb_id
|
||||
retriever = create_retriever_from_links(router.embeddings, link_list, index_name)
|
||||
router.llm_chain = (
|
||||
RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
|
||||
)
|
||||
print("[rag - upload_link] kb created successfully")
|
||||
except Exception as e:
|
||||
print(f"[rag - upload_link] create knowledge base failed! {e}")
|
||||
return JSONResponse(status_code=500, content={"message": "Fail to create new knowledge base."})
|
||||
return {"knowledge_base_id": kb_id}
|
||||
|
||||
|
||||
app.include_router(router)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def redirect_root_to_docs():
|
||||
return RedirectResponse("/docs")
|
||||
|
||||
|
||||
add_routes(app, router.llm_chain, path="/rag-redis")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
@@ -1,342 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.document_loaders import UnstructuredFileLoader
|
||||
from langchain_community.vectorstores import Redis
|
||||
from langchain_core.documents import Document
|
||||
from rag_redis.config import INDEX_SCHEMA, REDIS_URL
|
||||
|
||||
|
||||
def get_current_beijing_time():
|
||||
SHA_TZ = timezone(timedelta(hours=8), name="Asia/Shanghai")
|
||||
utc_now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
beijing_time = utc_now.astimezone(SHA_TZ).strftime("%Y-%m-%d-%H:%M:%S")
|
||||
return beijing_time
|
||||
|
||||
|
||||
def create_kb_folder(upload_dir):
|
||||
kb_id = f"kb_{str(uuid.uuid1())[:8]}"
|
||||
path_prefix = upload_dir
|
||||
|
||||
# create local folder for retieval
|
||||
cur_path = Path(path_prefix) / kb_id
|
||||
os.makedirs(path_prefix, exist_ok=True)
|
||||
cur_path.mkdir(parents=True, exist_ok=True)
|
||||
user_upload_dir = Path(path_prefix) / f"{kb_id}/upload_dir"
|
||||
user_persist_dir = Path(path_prefix) / f"{kb_id}/persist_dir"
|
||||
user_upload_dir.mkdir(parents=True, exist_ok=True)
|
||||
user_persist_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[rag - create kb folder] upload path: {user_upload_dir}, persist path: {user_persist_dir}")
|
||||
return kb_id, str(user_upload_dir), str(user_persist_dir)
|
||||
|
||||
|
||||
class Crawler:
|
||||
|
||||
def __init__(self, pool=None):
|
||||
if pool:
|
||||
assert isinstance(pool, (str, list, tuple)), "url pool should be str, list or tuple"
|
||||
self.pool = pool
|
||||
self.headers = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng, \
|
||||
*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, \
|
||||
like Gecko) Chrome/113.0.0.0 Safari/537.36",
|
||||
}
|
||||
self.fetched_pool = set()
|
||||
|
||||
def get_sublinks(self, soup):
|
||||
sublinks = []
|
||||
for links in soup.find_all("a"):
|
||||
sublinks.append(str(links.get("href")))
|
||||
return sublinks
|
||||
|
||||
def get_hyperlink(self, soup, base_url):
|
||||
sublinks = []
|
||||
for links in soup.find_all("a"):
|
||||
link = str(links.get("href"))
|
||||
if link.startswith("#") or link is None or link == "None":
|
||||
continue
|
||||
suffix = link.split("/")[-1]
|
||||
if "." in suffix and suffix.split(".")[-1] not in ["html", "htmld"]:
|
||||
continue
|
||||
link_parse = urlparse(link)
|
||||
base_url_parse = urlparse(base_url)
|
||||
if link_parse.path == "":
|
||||
continue
|
||||
if link_parse.netloc != "":
|
||||
# keep crawler works in the same domain
|
||||
if link_parse.netloc != base_url_parse.netloc:
|
||||
continue
|
||||
sublinks.append(link)
|
||||
else:
|
||||
sublinks.append(
|
||||
urlunparse(
|
||||
(
|
||||
base_url_parse.scheme,
|
||||
base_url_parse.netloc,
|
||||
link_parse.path,
|
||||
link_parse.params,
|
||||
link_parse.query,
|
||||
link_parse.fragment,
|
||||
)
|
||||
)
|
||||
)
|
||||
return sublinks
|
||||
|
||||
def fetch(self, url, headers=None, max_times=5):
|
||||
if not headers:
|
||||
headers = self.headers
|
||||
while max_times:
|
||||
if not url.startswith("http") or not url.startswith("https"):
|
||||
url = "http://" + url
|
||||
print("start fetch %s...", url)
|
||||
try:
|
||||
response = requests.get(url, headers=headers, verify=True)
|
||||
if response.status_code != 200:
|
||||
print("fail to fetch %s, response status code: %s", url, response.status_code)
|
||||
else:
|
||||
return response
|
||||
except Exception as e:
|
||||
print("fail to fetch %s, caused by %s", url, e)
|
||||
raise Exception(e)
|
||||
max_times -= 1
|
||||
return None
|
||||
|
||||
def process_work(self, sub_url, work):
|
||||
response = self.fetch(sub_url)
|
||||
if response is None:
|
||||
return []
|
||||
self.fetched_pool.add(sub_url)
|
||||
soup = self.parse(response.text)
|
||||
base_url = self.get_base_url(sub_url)
|
||||
sublinks = self.get_hyperlink(soup, base_url)
|
||||
if work:
|
||||
work(sub_url, soup)
|
||||
return sublinks
|
||||
|
||||
def crawl(self, pool, work=None, max_depth=10, workers=10):
|
||||
url_pool = set()
|
||||
for url in pool:
|
||||
base_url = self.get_base_url(url)
|
||||
response = self.fetch(url)
|
||||
soup = self.parse(response.text)
|
||||
sublinks = self.get_hyperlink(soup, base_url)
|
||||
self.fetched_pool.add(url)
|
||||
url_pool.update(sublinks)
|
||||
depth = 0
|
||||
while len(url_pool) > 0 and depth < max_depth:
|
||||
print("current depth %s...", depth)
|
||||
mp = multiprocessing.Pool(processes=workers)
|
||||
results = []
|
||||
for sub_url in url_pool:
|
||||
if sub_url not in self.fetched_pool:
|
||||
results.append(mp.apply_async(self.process_work, (sub_url, work)))
|
||||
mp.close()
|
||||
mp.join()
|
||||
url_pool = set()
|
||||
for result in results:
|
||||
sublinks = result.get()
|
||||
url_pool.update(sublinks)
|
||||
depth += 1
|
||||
|
||||
def parse(self, html_doc):
|
||||
soup = BeautifulSoup(html_doc, "lxml")
|
||||
return soup
|
||||
|
||||
def download(self, url, file_name):
|
||||
print("download %s into %s...", url, file_name)
|
||||
try:
|
||||
r = requests.get(url, stream=True, headers=self.headers, verify=True)
|
||||
f = open(file_name, "wb")
|
||||
for chunk in r.iter_content(chunk_size=512):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
except Exception as e:
|
||||
print("fail to download %s, caused by %s", url, e)
|
||||
|
||||
def get_base_url(self, url):
|
||||
result = urlparse(url)
|
||||
return urlunparse((result.scheme, result.netloc, "", "", "", ""))
|
||||
|
||||
def clean_text(self, text):
|
||||
text = text.strip().replace("\r", "\n")
|
||||
text = re.sub(" +", " ", text)
|
||||
text = re.sub("\n+", "\n", text)
|
||||
text = text.split("\n")
|
||||
return "\n".join([i for i in text if i and i != " "])
|
||||
|
||||
|
||||
def uni_pro(text):
|
||||
"""Check if the character is ASCII or falls in the category of non-spacing marks."""
|
||||
normalized_text = unicodedata.normalize("NFKD", text)
|
||||
filtered_text = ""
|
||||
for char in normalized_text:
|
||||
if ord(char) < 128 or unicodedata.category(char) == "Mn":
|
||||
filtered_text += char
|
||||
return filtered_text
|
||||
|
||||
|
||||
def load_html_data(url):
|
||||
crawler = Crawler()
|
||||
res = crawler.fetch(url)
|
||||
if res is None:
|
||||
return None
|
||||
soup = crawler.parse(res.text)
|
||||
all_text = crawler.clean_text(soup.select_one("body").text)
|
||||
main_content = ""
|
||||
for element_name in ["main", "container"]:
|
||||
main_block = None
|
||||
if soup.select(f".{element_name}"):
|
||||
main_block = soup.select(f".{element_name}")
|
||||
elif soup.select(f"#{element_name}"):
|
||||
main_block = soup.select(f"#{element_name}")
|
||||
if main_block:
|
||||
for element in main_block:
|
||||
text = crawler.clean_text(element.text)
|
||||
if text not in main_content:
|
||||
main_content += f"\n{text}"
|
||||
main_content = crawler.clean_text(main_content)
|
||||
|
||||
main_content = main_content.replace("\n", "")
|
||||
main_content = main_content.replace("\n\n", "")
|
||||
main_content = uni_pro(main_content)
|
||||
main_content = re.sub(r"\s+", " ", main_content)
|
||||
|
||||
# {'text': all_text, 'main_content': main_content}
|
||||
|
||||
return main_content
|
||||
|
||||
|
||||
def get_chuck_data(content, max_length, min_length, input):
|
||||
"""Process the context to make it maintain a suitable length for the generation."""
|
||||
sentences = re.split("(?<=[!.?])", content)
|
||||
|
||||
paragraphs = []
|
||||
current_length = 0
|
||||
count = 0
|
||||
current_paragraph = ""
|
||||
for sub_sen in sentences:
|
||||
count += 1
|
||||
sentence_length = len(sub_sen)
|
||||
if current_length + sentence_length <= max_length:
|
||||
current_paragraph += sub_sen
|
||||
current_length += sentence_length
|
||||
if count == len(sentences) and len(current_paragraph.strip()) > min_length:
|
||||
paragraphs.append([current_paragraph.strip(), input])
|
||||
else:
|
||||
paragraphs.append([current_paragraph.strip(), input])
|
||||
current_paragraph = sub_sen
|
||||
current_length = sentence_length
|
||||
|
||||
return paragraphs
|
||||
|
||||
|
||||
def parse_html(input):
|
||||
"""Parse the uploaded file."""
|
||||
chucks = []
|
||||
for link in input:
|
||||
if re.match(r"^https?:/{2}\w.+$", link):
|
||||
content = load_html_data(link)
|
||||
if content is None:
|
||||
continue
|
||||
chuck = [[content.strip(), link]]
|
||||
chucks += chuck
|
||||
else:
|
||||
print("The given link/str {} cannot be parsed.".format(link))
|
||||
|
||||
return chucks
|
||||
|
||||
|
||||
def document_transfer(data_collection):
|
||||
"Transfer the raw document into langchain supported format."
|
||||
documents = []
|
||||
for data, meta in data_collection:
|
||||
doc_id = str(uuid.uuid4())
|
||||
metadata = {"source": meta, "identify_id": doc_id}
|
||||
doc = Document(page_content=data, metadata=metadata)
|
||||
documents.append(doc)
|
||||
return documents
|
||||
|
||||
|
||||
def create_retriever_from_files(doc, embeddings, index_name: str):
|
||||
print(f"[rag - create retriever] create with index: {index_name}")
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
|
||||
loader = UnstructuredFileLoader(doc, mode="single", strategy="fast")
|
||||
chunks = loader.load_and_split(text_splitter)
|
||||
|
||||
rds = Redis.from_texts(
|
||||
texts=[chunk.page_content for chunk in chunks],
|
||||
metadatas=[chunk.metadata for chunk in chunks],
|
||||
embedding=embeddings,
|
||||
index_name=index_name,
|
||||
redis_url=REDIS_URL,
|
||||
index_schema=INDEX_SCHEMA,
|
||||
)
|
||||
|
||||
retriever = rds.as_retriever(search_type="mmr")
|
||||
return retriever
|
||||
|
||||
|
||||
def create_retriever_from_links(embeddings, link_list: list, index_name):
|
||||
data_collection = parse_html(link_list)
|
||||
texts = []
|
||||
metadatas = []
|
||||
for data, meta in data_collection:
|
||||
doc_id = str(uuid.uuid4())
|
||||
metadata = {"source": meta, "identify_id": doc_id}
|
||||
texts.append(data)
|
||||
metadatas.append(metadata)
|
||||
|
||||
rds = Redis.from_texts(
|
||||
texts=texts,
|
||||
metadatas=metadatas,
|
||||
embedding=embeddings,
|
||||
index_name=index_name,
|
||||
redis_url=REDIS_URL,
|
||||
index_schema=INDEX_SCHEMA,
|
||||
)
|
||||
|
||||
retriever = rds.as_retriever(search_type="mmr")
|
||||
return retriever
|
||||
|
||||
|
||||
def reload_retriever(embeddings, index_name):
|
||||
print(f"[rag - reload retriever] reload with index: {index_name}")
|
||||
rds = Redis.from_existing_index(
|
||||
embeddings,
|
||||
index_name=index_name,
|
||||
redis_url=REDIS_URL,
|
||||
schema=INDEX_SCHEMA,
|
||||
)
|
||||
|
||||
retriever = rds.as_retriever(search_type="mmr")
|
||||
return retriever
|
||||
|
||||
|
||||
def post_process_text(text: str):
|
||||
if text == " ":
|
||||
return "data: @#$\n\n"
|
||||
if text.isspace():
|
||||
return None
|
||||
if text == "\n":
|
||||
return "data: <br/>\n\n"
|
||||
new_text = text.replace(" ", "@#$")
|
||||
return f"data: {new_text}\n\n"
|
||||
@@ -1,23 +0,0 @@
|
||||
[tool.poetry]
|
||||
name = "my-app"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Your Name <you@example.com>"]
|
||||
readme = "README.md"
|
||||
packages = [
|
||||
{ include = "app" },
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
uvicorn = "^0.23.2"
|
||||
langserve = {extras = ["server"], version = ">=0.0.30"}
|
||||
pydantic = "<2"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-cli = ">=0.0.15"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -1,17 +0,0 @@
|
||||
-f https://download.pytorch.org/whl/torch_stable.html
|
||||
cryptography==42.0.4
|
||||
easyocr
|
||||
intel-extension-for-pytorch
|
||||
intel-openmp
|
||||
jupyter
|
||||
langchain==0.1.12
|
||||
langchain-cli
|
||||
langchain_benchmarks
|
||||
poetry
|
||||
pyarrow
|
||||
pydantic==1.10.13
|
||||
pymupdf
|
||||
redis
|
||||
sentence-transformers
|
||||
unstructured
|
||||
unstructured[all-docs]
|
||||
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,86 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import io
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores import Redis
|
||||
from PIL import Image
|
||||
from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
|
||||
|
||||
|
||||
def pdf_loader(file_path):
|
||||
try:
|
||||
import easyocr
|
||||
import fitz
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`PyMuPDF` or 'easyocr' package is not found, please install it with "
|
||||
"`pip install pymupdf or pip install easyocr.`"
|
||||
)
|
||||
|
||||
doc = fitz.open(file_path)
|
||||
reader = easyocr.Reader(["en"])
|
||||
result = ""
|
||||
for i in range(doc.page_count):
|
||||
page = doc.load_page(i)
|
||||
pagetext = page.get_text().strip()
|
||||
if pagetext:
|
||||
result = result + pagetext
|
||||
if len(doc.get_page_images(i)) > 0:
|
||||
for img in doc.get_page_images(i):
|
||||
if img:
|
||||
pageimg = ""
|
||||
xref = img[0]
|
||||
img_data = doc.extract_image(xref)
|
||||
img_bytes = img_data["image"]
|
||||
pil_image = Image.open(io.BytesIO(img_bytes))
|
||||
img = np.array(pil_image)
|
||||
img_result = reader.readtext(img, paragraph=True, detail=0)
|
||||
pageimg = pageimg + ", ".join(img_result).strip()
|
||||
if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."):
|
||||
pass
|
||||
else:
|
||||
pageimg = pageimg + "."
|
||||
result = result + pageimg
|
||||
return result
|
||||
|
||||
|
||||
def ingest_documents():
|
||||
"""Ingest PDF to Redis from the data/ directory that
|
||||
contains Edgar 10k filings data for Nike."""
|
||||
# Load list of pdfs
|
||||
company_name = "Nike"
|
||||
data_path = "data/"
|
||||
doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0]
|
||||
|
||||
print("Parsing 10k filing doc for NIKE", doc_path)
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
|
||||
content = pdf_loader(doc_path)
|
||||
chunks = text_splitter.split_text(content)
|
||||
|
||||
print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf")
|
||||
# Create vectorstore
|
||||
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
||||
|
||||
_ = Redis.from_texts(
|
||||
# appending this little bit can sometimes help with semantic retrieval
|
||||
# especially with multiple companies
|
||||
texts=[f"Company: {company_name}. " + chunk for chunk in chunks],
|
||||
embedding=embedder,
|
||||
index_name=INDEX_NAME,
|
||||
index_schema=INDEX_SCHEMA,
|
||||
redis_url=REDIS_URL,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ingest_documents()
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.document_loaders import DirectoryLoader, TextLoader, UnstructuredFileLoader
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores import Redis
|
||||
from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
|
||||
|
||||
loader = DirectoryLoader(
|
||||
"/ws/txt_files", glob="**/*.txt", show_progress=True, use_multithreading=True, loader_cls=TextLoader
|
||||
)
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
|
||||
|
||||
chunks = loader.load_and_split(text_splitter)
|
||||
print("Done preprocessing. Created", len(chunks), "chunks of the original data")
|
||||
|
||||
# Create vectorstore
|
||||
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
||||
|
||||
company_name = "Intel"
|
||||
_ = Redis.from_texts(
|
||||
# appending this little bit can sometimes help with semantic retrieval
|
||||
# especially with multiple companies
|
||||
texts=[f"Company: {company_name}. " + chunk.page_content for chunk in chunks],
|
||||
metadatas=[chunk.metadata for chunk in chunks],
|
||||
embedding=embedder,
|
||||
index_name=INDEX_NAME,
|
||||
index_schema=INDEX_SCHEMA,
|
||||
redis_url=REDIS_URL,
|
||||
)
|
||||
@@ -1,86 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import io
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores import Redis
|
||||
from PIL import Image
|
||||
from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
|
||||
|
||||
|
||||
def pdf_loader(file_path):
|
||||
try:
|
||||
import easyocr
|
||||
import fitz
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`PyMuPDF` or 'easyocr' package is not found, please install it with "
|
||||
"`pip install pymupdf or pip install easyocr.`"
|
||||
)
|
||||
|
||||
doc = fitz.open(file_path)
|
||||
reader = easyocr.Reader(["en"])
|
||||
result = ""
|
||||
for i in range(doc.page_count):
|
||||
page = doc.load_page(i)
|
||||
pagetext = page.get_text().strip()
|
||||
if pagetext:
|
||||
result = result + pagetext
|
||||
if len(doc.get_page_images(i)) > 0:
|
||||
for img in doc.get_page_images(i):
|
||||
if img:
|
||||
pageimg = ""
|
||||
xref = img[0]
|
||||
img_data = doc.extract_image(xref)
|
||||
img_bytes = img_data["image"]
|
||||
pil_image = Image.open(io.BytesIO(img_bytes))
|
||||
img = np.array(pil_image)
|
||||
img_result = reader.readtext(img, paragraph=True, detail=0)
|
||||
pageimg = pageimg + ", ".join(img_result).strip()
|
||||
if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."):
|
||||
pass
|
||||
else:
|
||||
pageimg = pageimg + "."
|
||||
result = result + pageimg
|
||||
return result
|
||||
|
||||
|
||||
def ingest_documents():
|
||||
"""Ingest PDF to Redis from the data/ directory that
|
||||
contains Intel manuals."""
|
||||
# Load list of pdfs
|
||||
company_name = "Intel"
|
||||
data_path = "data_intel/"
|
||||
doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0]
|
||||
|
||||
print("Parsing Intel architecture manuals", doc_path)
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
|
||||
content = pdf_loader(doc_path)
|
||||
chunks = text_splitter.split_text(content)
|
||||
|
||||
print("Done preprocessing. Created", len(chunks), "chunks of the original pdf")
|
||||
# Create vectorstore
|
||||
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
||||
|
||||
_ = Redis.from_texts(
|
||||
# appending this little bit can sometimes help with semantic retrieval
|
||||
# especially with multiple companies
|
||||
texts=[f"Company: {company_name}. " + chunk for chunk in chunks],
|
||||
embedding=embedder,
|
||||
index_name=INDEX_NAME,
|
||||
index_schema=INDEX_SCHEMA,
|
||||
redis_url=REDIS_URL,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ingest_documents()
|
||||
@@ -1,88 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "681a5d1e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Connect to RAG App\n",
|
||||
"\n",
|
||||
"Assuming you are already running this server:\n",
|
||||
"```bash\n",
|
||||
"langserve start\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "d774be2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Nike's revenue in 2023 was $51.2 billion. \n",
|
||||
"\n",
|
||||
"Source: 'data/nke-10k-2023.pdf', Start Index: '146100'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langserve.client import RemoteRunnable\n",
|
||||
"\n",
|
||||
"rag_redis = RemoteRunnable(\"http://localhost:8000/rag-redis\")\n",
|
||||
"\n",
|
||||
"print(rag_redis.invoke(\"What was Nike's revenue in 2023?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "07ae0005",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"As of May 31, 2023, Nike had approximately 83,700 employees worldwide. This information can be found in the first piece of context provided. (source: data/nke-10k-2023.pdf, start_index: 32532)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(rag_redis.invoke(\"How many employees work at Nike?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a6b9f00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -1,76 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.vectorstores import Redis
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
||||
from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL, TGI_LLM_ENDPOINT
|
||||
|
||||
|
||||
# Make this look better in the docs.
|
||||
class Question(BaseModel):
|
||||
__root__: str
|
||||
|
||||
|
||||
# Init Embeddings
|
||||
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
||||
|
||||
# Setup semantic cache for LLM
|
||||
from langchain.cache import RedisSemanticCache
|
||||
from langchain.globals import set_llm_cache
|
||||
|
||||
set_llm_cache(RedisSemanticCache(embedding=embedder, redis_url=REDIS_URL))
|
||||
|
||||
# Connect to pre-loaded vectorstore
|
||||
# run the ingest.py script to populate this
|
||||
vectorstore = Redis.from_existing_index(
|
||||
embedding=embedder, index_name=INDEX_NAME, schema=INDEX_SCHEMA, redis_url=REDIS_URL
|
||||
)
|
||||
|
||||
# TODO allow user to change parameters
|
||||
retriever = vectorstore.as_retriever(search_type="mmr")
|
||||
|
||||
# Define our prompt
|
||||
template = """
|
||||
Use the following pieces of context from retrieved
|
||||
dataset to answer the question. Do not make up an answer if there is no
|
||||
context provided to help answer it. Include the 'source' and 'start_index'
|
||||
from the metadata included in the context you used to answer the question
|
||||
|
||||
Context:
|
||||
---------
|
||||
{context}
|
||||
|
||||
---------
|
||||
Question: {question}
|
||||
---------
|
||||
|
||||
Answer:
|
||||
"""
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(template)
|
||||
|
||||
# RAG Chain
|
||||
model = HuggingFaceEndpoint(
|
||||
endpoint_url=TGI_LLM_ENDPOINT,
|
||||
max_new_tokens=512,
|
||||
top_k=10,
|
||||
top_p=0.95,
|
||||
typical_p=0.95,
|
||||
temperature=0.01,
|
||||
repetition_penalty=1.03,
|
||||
streaming=True,
|
||||
truncate=1024,
|
||||
)
|
||||
|
||||
chain = (
|
||||
RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) | prompt | model | StrOutputParser()
|
||||
).with_types(input_type=Question)
|
||||
@@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def get_boolean_env_var(var_name, default_value=False):
|
||||
"""Retrieve the boolean value of an environment variable.
|
||||
|
||||
Args:
|
||||
var_name (str): The name of the environment variable to retrieve.
|
||||
default_value (bool): The default value to return if the variable
|
||||
is not found.
|
||||
|
||||
Returns:
|
||||
bool: The value of the environment variable, interpreted as a boolean.
|
||||
"""
|
||||
true_values = {"true", "1", "t", "y", "yes"}
|
||||
false_values = {"false", "0", "f", "n", "no"}
|
||||
|
||||
# Retrieve the environment variable's value
|
||||
value = os.getenv(var_name, "").lower()
|
||||
|
||||
# Decide the boolean value based on the content of the string
|
||||
if value in true_values:
|
||||
return True
|
||||
elif value in false_values:
|
||||
return False
|
||||
else:
|
||||
return default_value
|
||||
|
||||
|
||||
# Check for openai API key
|
||||
# if "OPENAI_API_KEY" not in os.environ:
|
||||
# raise Exception("Must provide an OPENAI_API_KEY as an env var.")
|
||||
|
||||
|
||||
# Whether or not to enable langchain debugging
|
||||
DEBUG = get_boolean_env_var("DEBUG", False)
|
||||
# Set DEBUG env var to "true" if you wish to enable LC debugging module
|
||||
if DEBUG:
|
||||
import langchain
|
||||
|
||||
langchain.debug = True
|
||||
|
||||
|
||||
# Embedding model
|
||||
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||
|
||||
# Redis Connection Information
|
||||
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
|
||||
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
|
||||
|
||||
|
||||
def format_redis_conn_from_env():
|
||||
redis_url = os.getenv("REDIS_URL", None)
|
||||
if redis_url:
|
||||
return redis_url
|
||||
else:
|
||||
using_ssl = get_boolean_env_var("REDIS_SSL", False)
|
||||
start = "rediss://" if using_ssl else "redis://"
|
||||
|
||||
# if using RBAC
|
||||
password = os.getenv("REDIS_PASSWORD", None)
|
||||
username = os.getenv("REDIS_USERNAME", "default")
|
||||
if password is not None:
|
||||
start += f"{username}:{password}@"
|
||||
|
||||
return start + f"{REDIS_HOST}:{REDIS_PORT}"
|
||||
|
||||
|
||||
REDIS_URL = format_redis_conn_from_env()
|
||||
|
||||
# Vector Index Configuration
|
||||
INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis")
|
||||
|
||||
|
||||
current_file_path = os.path.abspath(__file__)
|
||||
parent_dir = os.path.dirname(current_file_path)
|
||||
REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "schema.yml")
|
||||
schema_path = os.path.join(parent_dir, REDIS_SCHEMA)
|
||||
INDEX_SCHEMA = schema_path
|
||||
TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
|
||||
TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
|
||||
@@ -1,15 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
text:
|
||||
- name: content
|
||||
- name: source
|
||||
numeric:
|
||||
- name: start_index
|
||||
vector:
|
||||
- name: content_vector
|
||||
algorithm: HNSW
|
||||
datatype: FLOAT32
|
||||
dims: 384
|
||||
distance_metric: COSINE
|
||||
@@ -1,15 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
text:
|
||||
- name: content
|
||||
- name: source
|
||||
numeric:
|
||||
- name: start_index
|
||||
vector:
|
||||
- name: content_vector
|
||||
algorithm: HNSW
|
||||
datatype: FLOAT32
|
||||
dims: 1024
|
||||
distance_metric: COSINE
|
||||
@@ -1,15 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
text:
|
||||
- name: content
|
||||
- name: source
|
||||
numeric:
|
||||
- name: start_index
|
||||
vector:
|
||||
- name: content_vector
|
||||
algorithm: HNSW
|
||||
datatype: FLOAT32
|
||||
dims: 768
|
||||
distance_metric: COSINE
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
text:
|
||||
- name: content
|
||||
- name: changefreq
|
||||
- name: description
|
||||
- name: language
|
||||
- name: loc
|
||||
- name: priority
|
||||
- name: source
|
||||
- name: title
|
||||
vector:
|
||||
- name: content_vector
|
||||
algorithm: HNSW
|
||||
datatype: FLOAT32
|
||||
dims: 768
|
||||
distance_metric: COSINE
|
||||
@@ -1,89 +0,0 @@
|
||||
[TGI-Gaudi](https://github.com/huggingface/tgi-gaudi) provides many parameters aimed at optimizing performance for text generation inference tasks. By optimizing these parameters, users can achieve the best results in terms of inference speed, memory usage, and overall efficiency. These parameters cover various aspects such as maximum sequence length, batch size, Gaudi processor utilization, and environment configurations. By carefully adjusting these parameters according to the specific requirements of the workload and hardware environment, users can unlock the full potential of TGI-Gaudi for the text generation tasks.
|
||||
|
||||
# Knowledeges about TGI-Gaudi performance tuning
|
||||
|
||||
## Adjusting TGI parameters
|
||||
|
||||
Maximum sequence length is controlled by two arguments:
|
||||
|
||||
- `--max-input-length` is the maximum possible input prompt length. Default value is `1024`.
|
||||
- `--max-total-tokens` is the maximum possible total length of the sequence (input and output). Default value is `2048`.
|
||||
|
||||
Maximum batch size is controlled by two arguments:
|
||||
|
||||
- For prefill operation, please set `--max-prefill-total-tokens` as `bs * max-input-length`, where `bs` is your expected maximum prefill batch size.
|
||||
- For decode operation, please set `--max-batch-total-tokens` as `bs * max-total-tokens`, where `bs` is your expected maximum decode batch size.
|
||||
- Please note that batch size will be always padded to the nearest multiplication of `BATCH_BUCKET_SIZE` and `PREFILL_BATCH_BUCKET_SIZE`.
|
||||
|
||||
To ensure greatest performance results, at the beginning of each server run, warmup is performed. It's designed to cover major recompilations while using HPU Graphs. It creates queries with all possible input shapes, based on provided parameters (described in this section) and runs basic TGI operations on them (prefill, decode, concatenate).
|
||||
|
||||
Except those already mentioned, there are other parameters that need to be properly adjusted to improve performance or memory usage:
|
||||
|
||||
- `PAD_SEQUENCE_TO_MULTIPLE_OF` determines sizes of input length buckets. Since warmup creates several graphs for each bucket, it's important to adjust that value proportionally to input sequence length. Otherwise, some out of memory issues can be observed.
|
||||
- `ENABLE_HPU_GRAPH` enables HPU graphs usage, which is crucial for performance results. Recommended value to keep is `true` .
|
||||
|
||||
For more information and documentation about Text Generation Inference, checkout [the README](https://github.com/huggingface/text-generation-inference#text-generation-inference) of the original repo.
|
||||
|
||||
## Environment Variable HABANA_VISIBLE_MODULES
|
||||
|
||||
To run a workload with part of the available Gaudi processors, you need to set the module IDs of the used Gaudi processors in the environment, HABANA_VISIBLE_MODULES. In general, there are eight Gaudi processors on a node, so the module IDs would be in the range of 0 ~ 7. If you want to run a 4-Gaudi workload, you can set the below before you run the workload:
|
||||
|
||||
```bash
|
||||
export HABANA_VISIBLE_MODULES="0,1,2,3"
|
||||
```
|
||||
|
||||
If you want to run another 4-Gaudi workload in parallel, you can set the below before running the second workload to let it use the rest of the available four Gaudi processors.
|
||||
|
||||
```bash
|
||||
export HABANA_VISIBLE_MODULES="4,5,6,7"
|
||||
```
|
||||
|
||||
Though using partial Gaudi in a workload is possible, only 2-Gaudi and 4-Gaudi scenarios are supported. It is highly recommended to set HABANA_VISIBLE_MODULES using the combinations listed below:
|
||||
|
||||
- 2-Gaudi - “0,1”, “2,3”, “4,5” or “6,7”
|
||||
- 4-Gaudi - “0,1,2,3” or “4,5,6,7”
|
||||
|
||||
For the details please check [Multiple_Workloads_Single_Docker](https://docs.habana.ai/en/latest/PyTorch/Reference/PT_Multiple_Tenants_on_HPU/Multiple_Workloads_Single_Docker.html)
|
||||
|
||||
## Environment Variable HABANA_VISIBLE_DEVICES
|
||||
|
||||
There are some guidelines on setting HABANA_VISIBLE_DEVICES, however, you need to know how to find the mapping between the index and module ID of the Gaudi processors before reading the guidelines. The below command is a sample output of the mapping between index and module ID of the Gaudi processors:
|
||||
|
||||
```bash
|
||||
hl-smi -Q index,module_id -f csv
|
||||
```
|
||||
|
||||
| index | module_id |
|
||||
| :---: | :-------: |
|
||||
| 3 | 6 |
|
||||
| 1 | 4 |
|
||||
| 2 | 7 |
|
||||
| 0 | 5 |
|
||||
| 4 | 2 |
|
||||
| 6 | 0 |
|
||||
| 7 | 3 |
|
||||
| 3 | 1 |
|
||||
|
||||
With the mapping between index and module ID, you can set `HABANA_VISIBLE_DEVICES` properly with the guidelines below:
|
||||
|
||||
- Mount two Gaudi Processors or four Gaudi Processors in the docker container. Even though using partial Gaudi in a distributed workload is possible, only 2-Gaudi and 4-Gaudi scenario are allowed.
|
||||
- Since `HABANA_VISIBLE_DEVICES` accepts index instead of module ID, you need to leverage the above command to figure out the corresponding indices for a set of module IDs.
|
||||
- Avoid mounting the same index on multiple containers. Since multiple workloads might run in parallel, avoiding mounting the same Gaudi to multiple docker containers can prevent reusing the same Gaudi in different workloads.
|
||||
|
||||
For the details please check [Multiple Dockers Each with a Single Workload](https://docs.habana.ai/en/latest/PyTorch/Reference/PT_Multiple_Tenants_on_HPU/Multiple_Dockers_each_with_Single_Workload.html)
|
||||
|
||||
For the System Management Interface Tool please check [hl-smi](https://docs.habana.ai/en/latest/Management_and_Monitoring/Embedded_System_Tools_Guide/System_Management_Interface_Tool.html)
|
||||
|
||||
# Verified Docker commands with tuned parameters for best performance
|
||||
|
||||
## Docker command for 70B model
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
|
||||
```
|
||||
|
||||
## Docker command for 13B model
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
|
||||
```
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
git clone https://github.com/huggingface/tgi-gaudi.git
|
||||
cd ./tgi-gaudi/
|
||||
docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
|
||||
@@ -1,41 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Set default values
|
||||
default_port=8080
|
||||
default_model="Intel/neural-chat-7b-v3-3"
|
||||
default_num_cards=1
|
||||
|
||||
# Check if all required arguments are provided
|
||||
if [ "$#" -lt 0 ] || [ "$#" -gt 3 ]; then
|
||||
echo "Usage: $0 [num_cards] [port_number] [model_name]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Assign arguments to variables
|
||||
num_cards=${1:-$default_num_cards}
|
||||
port_number=${2:-$default_port}
|
||||
model_name=${3:-$default_model}
|
||||
|
||||
# Check if num_cards is within the valid range (1-8)
|
||||
if [ "$num_cards" -lt 1 ] || [ "$num_cards" -gt 8 ]; then
|
||||
echo "Error: num_cards must be between 1 and 8."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set the volume variable
|
||||
volume=$PWD/data
|
||||
|
||||
# Build the Docker run command based on the number of cards
|
||||
if [ "$num_cards" -eq 1 ]; then
|
||||
docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name"
|
||||
else
|
||||
docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name --sharded true --num-shard $num_cards"
|
||||
fi
|
||||
|
||||
# Execute the Docker run command
|
||||
echo $docker_cmd
|
||||
eval $docker_cmd
|
||||
@@ -1,63 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
|
||||
function test_env_setup() {
|
||||
WORKPATH=$(dirname "$PWD")/audio/docker
|
||||
LOG_PATH=$(dirname "$PWD")/tests/asr.log
|
||||
ASR_CONTAINER_NAME="test-audioqna-asr"
|
||||
cd $WORKPATH
|
||||
}
|
||||
|
||||
function start_asr_service() {
|
||||
cd $WORKPATH
|
||||
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -f Dockerfile_asr -t intel/gen-ai-examples:$ASR_CONTAINER_NAME
|
||||
docker run -d --name=$ASR_CONTAINER_NAME -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8018:8008 intel/gen-ai-examples:$ASR_CONTAINER_NAME
|
||||
sleep 1m
|
||||
}
|
||||
|
||||
function run_tests() {
|
||||
cd $WORKPATH
|
||||
rm -f sample.wav
|
||||
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
||||
http_proxy= curl -F 'file=@sample.wav' http://localhost:8018/v1/audio/transcriptions > $LOG_PATH
|
||||
rm -f sample.wav
|
||||
}
|
||||
|
||||
function check_response() {
|
||||
cd $WORKPATH
|
||||
echo "Checking response"
|
||||
local status=false
|
||||
if [[ -f $LOG_PATH ]] && [[ $(grep -c "who is pat gelsinger" $LOG_PATH) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
|
||||
if [ $status == false ]; then
|
||||
echo "Response check failed"
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeed"
|
||||
fi
|
||||
}
|
||||
|
||||
function docker_stop() {
|
||||
local container_name=$1
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
|
||||
}
|
||||
|
||||
|
||||
|
||||
function main() {
|
||||
test_env_setup
|
||||
docker_stop $ASR_CONTAINER_NAME && sleep 5s
|
||||
start_asr_service
|
||||
run_tests
|
||||
docker_stop $ASR_CONTAINER_NAME && sleep 5s
|
||||
echo y | docker system prune
|
||||
check_response
|
||||
}
|
||||
|
||||
main
|
||||
@@ -1,110 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
|
||||
function test_env_setup() {
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests/langchain.log"
|
||||
|
||||
REDIS_CONTAINER_NAME="test-redis-vector-db"
|
||||
LANGCHAIN_CONTAINER_NAME="test-qna-rag-redis-server"
|
||||
AUDIOQNA_CONTAINER_NAME="test-AudioQnA_server"
|
||||
cd $WORKPATH
|
||||
}
|
||||
|
||||
function rename() {
|
||||
# Rename the docker container/image names to avoid conflict with local test
|
||||
cd ${WORKPATH}
|
||||
sed -i "s/container_name: redis-vector-db/container_name: ${REDIS_CONTAINER_NAME}/g" langchain/docker/docker-compose.yml
|
||||
sed -i "s/container_name: qna-rag-redis-server/container_name: ${LANGCHAIN_CONTAINER_NAME}/g" langchain/docker/docker-compose.yml
|
||||
sed -i "s/image: intel\/gen-ai-examples:qna-rag-redis-server/image: intel\/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}/g" langchain/docker/docker-compose.yml
|
||||
sed -i "s/ChatQnA_server/${AUDIOQNA_CONTAINER_NAME}/g" serving/tgi_gaudi/launch_tgi_service.sh
|
||||
}
|
||||
|
||||
function launch_tgi_gaudi_service() {
|
||||
local card_num=1
|
||||
local port=8888
|
||||
local model_name="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
cd ${WORKPATH}
|
||||
|
||||
# Reset the tgi port
|
||||
sed -i "s/8080/$port/g" langchain/redis/rag_redis/config.py
|
||||
sed -i "s/8080/$port/g" langchain/docker/qna-app/app/server.py
|
||||
sed -i "s/8080/$port/g" langchain/docker/qna-app/Dockerfile
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
bash serving/tgi_gaudi/launch_tgi_service.sh $card_num $port $model_name
|
||||
sleep 3m # Waits 3 minutes
|
||||
}
|
||||
|
||||
function launch_redis_and_langchain_service() {
|
||||
cd $WORKPATH
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
local port=8890
|
||||
sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
|
||||
docker compose -f langchain/docker/docker-compose.yml up -d --build
|
||||
|
||||
# Ingest data into redis
|
||||
docker exec $LANGCHAIN_CONTAINER_NAME \
|
||||
bash -c "cd /ws && python ingest.py > /dev/null"
|
||||
}
|
||||
|
||||
function start_backend_service() {
|
||||
cd $WORKPATH
|
||||
docker exec $LANGCHAIN_CONTAINER_NAME \
|
||||
bash -c "nohup python app/server.py &"
|
||||
sleep 1m
|
||||
}
|
||||
|
||||
function run_tests() {
|
||||
cd $WORKPATH
|
||||
local port=8890
|
||||
curl 127.0.0.1:$port/v1/rag/chat \
|
||||
-X POST \
|
||||
-d "{\"query\":\"What is the total revenue of Nike in 2023?\"}" \
|
||||
-H 'Content-Type: application/json' > $LOG_PATH
|
||||
}
|
||||
|
||||
function check_response() {
|
||||
cd $WORKPATH
|
||||
echo "Checking response"
|
||||
local status=false
|
||||
if [[ -f $LOG_PATH ]] && [[ $(grep -c "\$51.2 billion" $LOG_PATH) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
|
||||
if [ $status == false ]; then
|
||||
echo "Response check failed"
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeed"
|
||||
fi
|
||||
}
|
||||
|
||||
function docker_stop() {
|
||||
local container_name=$1
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
|
||||
}
|
||||
|
||||
function main() {
|
||||
test_env_setup
|
||||
rename
|
||||
docker_stop $CHATQNA_CONTAINER_NAME && docker_stop $LANGCHAIN_CONTAINER_NAME && docker_stop $REDIS_CONTAINER_NAME && sleep 5s
|
||||
|
||||
launch_tgi_gaudi_service
|
||||
launch_redis_and_langchain_service
|
||||
start_backend_service
|
||||
|
||||
run_tests
|
||||
|
||||
docker_stop $AUDIOQNA_CONTAINER_NAME && docker_stop $LANGCHAIN_CONTAINER_NAME && docker_stop $REDIS_CONTAINER_NAME && sleep 5s
|
||||
echo y | docker system prune
|
||||
|
||||
check_response
|
||||
}
|
||||
|
||||
main
|
||||
@@ -1,84 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
|
||||
function test_env_setup() {
|
||||
WORKPATH=$(dirname "$PWD")/audio/docker
|
||||
OUTPUT_PATH=$(dirname "$PWD")/tests/output.wav
|
||||
TTS_CONTAINER_NAME="test-audioqna-tts"
|
||||
cd $WORKPATH
|
||||
}
|
||||
|
||||
function start_tts_service() {
|
||||
cd $WORKPATH
|
||||
rm -rf pretrained_tts_models
|
||||
git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_tts_models
|
||||
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -f Dockerfile_tts -t intel/gen-ai-examples:$TTS_CONTAINER_NAME
|
||||
docker run -d --name=$TTS_CONTAINER_NAME -v ./pretrained_tts_models:/GPT-SoVITS/GPT_SoVITS/pretrained_models -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} -p 9888:9880 intel/gen-ai-examples:$TTS_CONTAINER_NAME --bf16
|
||||
sleep 1m
|
||||
}
|
||||
|
||||
function run_tests() {
|
||||
cd $WORKPATH
|
||||
rm -f ${OUTPUT_PATH}
|
||||
rm -f sample.wav
|
||||
|
||||
# Upload reference audio as default voice
|
||||
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
||||
curl --location 'localhost:9888/upload_as_default' \
|
||||
--form 'default_refer_file=@"sample.wav"' \
|
||||
--form 'default_refer_text="Who is Pat Gelsinger?"' \
|
||||
--form 'default_refer_language="en"'
|
||||
|
||||
# Do text to speech conversion
|
||||
curl --location 'localhost:9888/v1/audio/speech' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"text": "You can have a look, but you should not touch this item.",
|
||||
"text_language": "en"
|
||||
}' \
|
||||
--output ${OUTPUT_PATH}
|
||||
rm -f sample.wav
|
||||
}
|
||||
|
||||
function check_response() {
|
||||
cd $WORKPATH
|
||||
echo "Checking response"
|
||||
local status=false
|
||||
|
||||
if [[ -f $OUTPUT_PATH ]]; then
|
||||
status=true
|
||||
fi
|
||||
|
||||
if [ $status == false ]; then
|
||||
echo "Response check failed"
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeed"
|
||||
fi
|
||||
|
||||
# clear resources
|
||||
rm -f ${OUTPUT_PATH}
|
||||
}
|
||||
|
||||
function docker_stop() {
|
||||
local container_name=$1
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
|
||||
}
|
||||
|
||||
function main() {
|
||||
test_env_setup
|
||||
docker_stop $TTS_CONTAINER_NAME && sleep 5s
|
||||
|
||||
start_tts_service
|
||||
run_tests
|
||||
check_response
|
||||
|
||||
docker_stop $TTS_CONTAINER_NAME && sleep 5s
|
||||
echo y | docker system prune
|
||||
}
|
||||
|
||||
main
|
||||
@@ -15,11 +15,12 @@ RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
RUN cd /home/user/ && \
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
RUN cd /home/user/GenAIComps && pip install --no-cache-dir --upgrade pip && \
|
||||
pip install -r /home/user/GenAIComps/requirements.txt
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./audioqna.py /home/user/audioqna.py
|
||||
|
||||
|
||||
54
AudioQnA/docker/docker_build_compose.yaml
Normal file
54
AudioQnA/docker/docker_build_compose.yaml
Normal file
@@ -0,0 +1,54 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
audioqna:
|
||||
build:
|
||||
args:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
whisper-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/Dockerfile_hpu
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
whisper:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
asr:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
llm-tgi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/text-generation/tgi/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
speecht5-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/Dockerfile_hpu
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
speecht5:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
tts:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker/gaudi/
|
||||
docker compose up -d
|
||||
TAG=v0.9 docker compose up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: opea/whisper-gaudi:latest
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
@@ -22,7 +19,7 @@ services:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: opea/asr:latest
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
@@ -30,7 +27,7 @@ services:
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: opea/speecht5-gaudi:latest
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
@@ -46,7 +43,7 @@ services:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: opea/tts:latest
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
@@ -75,7 +72,7 @@ services:
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
llm:
|
||||
image: opea/llm-tgi:latest
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
@@ -90,7 +87,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
audioqna-gaudi-backend-server:
|
||||
image: opea/audioqna:latest
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
|
||||
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker/xeon/
|
||||
docker compose up -d
|
||||
TAG=v0.9 docker compose up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: opea/whisper:latest
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
@@ -17,7 +14,7 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: opea/asr:latest
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
@@ -25,7 +22,7 @@ services:
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: opea/speecht5:latest
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
@@ -36,7 +33,7 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: opea/tts:latest
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
@@ -44,7 +41,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
@@ -56,9 +53,9 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: opea/llm-tgi:latest
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
@@ -73,7 +70,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
audioqna-xeon-backend-server:
|
||||
image: opea/audioqna:latest
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
|
||||
74
AudioQnA/kubernetes/README.md
Normal file
74
AudioQnA/kubernetes/README.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# Deploy AudioQnA in Kubernetes Cluster on Xeon and Gaudi
|
||||
|
||||
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
|
||||
|
||||
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
|
||||
|
||||
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
|
||||
|
||||
|
||||
## Using prebuilt images
|
||||
|
||||
The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
- llm: opea/llm-tgi:v0.9
|
||||
- asr: opea/asr:v0.9
|
||||
- whisper: opea/whisper:v0.9
|
||||
- tts: opea/tts:v0.9
|
||||
- speecht5: opea/speecht5:v0.9
|
||||
|
||||
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- whisper-gaudi: opea/whisper-gaudi:v0.9
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:v0.9
|
||||
|
||||
> [NOTE]
|
||||
> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
|
||||
|
||||
## Deploy AudioQnA pipeline
|
||||
This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.yaml or if you have a Gaudi cluster, you could use audioQnA_gaudi.yaml.
|
||||
|
||||
1. Create namespace and deploy application
|
||||
```sh
|
||||
kubectl create ns audioqa
|
||||
kubectl apply -f $(pwd)/audioQnA_xeon.yaml
|
||||
```
|
||||
|
||||
2. GMC will reconcile the AudioQnA custom resource and get all related components/services ready. Check if the service up.
|
||||
|
||||
```sh
|
||||
kubectl get service -n audioqa
|
||||
```
|
||||
|
||||
3. Retrieve the application access URL
|
||||
|
||||
```sh
|
||||
kubectl get gmconnectors.gmc.opea.io -n audioqa
|
||||
NAME URL READY AGE
|
||||
audioqa http://router-service.audioqa.svc.cluster.local:8080 6/0/6 5m
|
||||
```
|
||||
|
||||
4. Deploy a client pod to test the application
|
||||
|
||||
```sh
|
||||
kubectl create deployment client-test -n audioqa --image=python:3.8.13 -- sleep infinity
|
||||
```
|
||||
|
||||
5. Access the application using the above URL from the client pod
|
||||
|
||||
```sh
|
||||
export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n audioqa -- curl $accessUrl -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
> [NOTE]
|
||||
|
||||
You can remove your AudioQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the audioqa namespace.
|
||||
58
AudioQnA/kubernetes/audioQnA_gaudi.yaml
Normal file
58
AudioQnA/kubernetes/audioQnA_gaudi.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: gmc.opea.io/v1alpha3
|
||||
kind: GMConnector
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: gmconnector
|
||||
app.kubernetes.io/managed-by: kustomize
|
||||
gmc/platform: gaudi
|
||||
name: audioqa
|
||||
namespace: audioqa
|
||||
spec:
|
||||
routerConfig:
|
||||
name: router
|
||||
serviceName: router-service
|
||||
nodes:
|
||||
root:
|
||||
routerType: Sequence
|
||||
steps:
|
||||
- name: Asr
|
||||
internalService:
|
||||
serviceName: asr-svc
|
||||
config:
|
||||
endpoint: /v1/audio/transcriptions
|
||||
ASR_ENDPOINT: whisper-gaudi-svc
|
||||
- name: WhisperGaudi
|
||||
internalService:
|
||||
serviceName: whisper-gaudi-svc
|
||||
config:
|
||||
endpoint: /v1/asr
|
||||
isDownstreamService: true
|
||||
- name: Llm
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: llm-svc
|
||||
config:
|
||||
endpoint: /v1/chat/completions
|
||||
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||
- name: TgiGaudi
|
||||
internalService:
|
||||
serviceName: tgi-gaudi-svc
|
||||
config:
|
||||
endpoint: /generate
|
||||
isDownstreamService: true
|
||||
- name: Tts
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: tts-svc
|
||||
config:
|
||||
endpoint: /v1/audio/speech
|
||||
TTS_ENDPOINT: speecht5-gaudi-svc
|
||||
- name: SpeechT5Gaudi
|
||||
internalService:
|
||||
serviceName: speecht5-gaudi-svc
|
||||
config:
|
||||
endpoint: /v1/tts
|
||||
isDownstreamService: true
|
||||
58
AudioQnA/kubernetes/audioQnA_xeon.yaml
Normal file
58
AudioQnA/kubernetes/audioQnA_xeon.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: gmc.opea.io/v1alpha3
|
||||
kind: GMConnector
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: gmconnector
|
||||
app.kubernetes.io/managed-by: kustomize
|
||||
gmc/platform: xeon
|
||||
name: audioqa
|
||||
namespace: audioqa
|
||||
spec:
|
||||
routerConfig:
|
||||
name: router
|
||||
serviceName: router-service
|
||||
nodes:
|
||||
root:
|
||||
routerType: Sequence
|
||||
steps:
|
||||
- name: Asr
|
||||
internalService:
|
||||
serviceName: asr-svc
|
||||
config:
|
||||
endpoint: /v1/audio/transcriptions
|
||||
ASR_ENDPOINT: whisper-svc
|
||||
- name: Whisper
|
||||
internalService:
|
||||
serviceName: whisper-svc
|
||||
config:
|
||||
endpoint: /v1/asr
|
||||
isDownstreamService: true
|
||||
- name: Llm
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: llm-svc
|
||||
config:
|
||||
endpoint: /v1/chat/completions
|
||||
TGI_LLM_ENDPOINT: tgi-svc
|
||||
- name: Tgi
|
||||
internalService:
|
||||
serviceName: tgi-svc
|
||||
config:
|
||||
endpoint: /generate
|
||||
isDownstreamService: true
|
||||
- name: Tts
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: tts-svc
|
||||
config:
|
||||
endpoint: /v1/audio/speech
|
||||
TTS_ENDPOINT: speecht5-svc
|
||||
- name: SpeechT5
|
||||
internalService:
|
||||
serviceName: speecht5-svc
|
||||
config:
|
||||
endpoint: /v1/tts
|
||||
isDownstreamService: true
|
||||
32
AudioQnA/kubernetes/manifests/README.md
Normal file
32
AudioQnA/kubernetes/manifests/README.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# Deploy VisualQnA in a Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
> You can also customize the "MODEL_ID" and "model-volume"
|
||||
|
||||
## Deploy On Xeon
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/manifests/xeon
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
```
|
||||
## Deploy On Gaudi
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/manifests/gaudi
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
```
|
||||
|
||||
|
||||
## Verify Services
|
||||
|
||||
Make sure all the pods are running, and restart the audioqna-xxxx pod if necessary.
|
||||
|
||||
```bash
|
||||
kubectl get pods
|
||||
|
||||
curl http://${host_ip}:3008/v1/audioqna -X POST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json'
|
||||
```
|
||||
439
AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml
Normal file
439
AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml
Normal file
@@ -0,0 +1,439 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: audio-qna-config
|
||||
namespace: default
|
||||
data:
|
||||
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
|
||||
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
|
||||
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
|
||||
ASR_SERVICE_HOST_IP: asr-svc
|
||||
ASR_SERVICE_PORT: "3001"
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
LLM_SERVICE_PORT: "3007"
|
||||
TTS_SERVICE_HOST_IP: tts-svc
|
||||
TTS_SERVICE_PORT: "3002"
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: asr-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: asr-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: asr-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: asr-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/asr:v0.9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: asr-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9099
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: asr-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: asr-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3001
|
||||
targetPort: 9099
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: whisper-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: whisper-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: whisper-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: whisper-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/whisper-gaudi:v0.9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: whisper-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7066
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: whisper-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: whisper-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7066
|
||||
targetPort: 7066
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: tts-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: tts-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: tts-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: tts-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/tts:v0.9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: tts-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9088
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: tts-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: tts-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3002
|
||||
targetPort: 9088
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: speecht5-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: speecht5-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: speecht5-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: speecht5-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/speecht5-gaudi:v0.9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: speecht5-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7055
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: speecht5-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: speecht5-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7055
|
||||
targetPort: 7055
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: PREFILL_BATCH_BUCKET_SIZE
|
||||
value: "1"
|
||||
- name: BATCH_BUCKET_SIZE
|
||||
value: "8"
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3006
|
||||
targetPort: 80
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/llm-tgi:v0.9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3007
|
||||
targetPort: 9000
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: audioqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: audioqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: audioqna-backend-server-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: audioqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/audioqna:v0.9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: audioqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: audioqna-backend-server-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: audioqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3008
|
||||
targetPort: 8888
|
||||
nodePort: 30666
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user