Compare commits

..

58 Commits

Author SHA1 Message Date
xiguiw
a03feb700b Merge branch 'main' into update_vLLM 2025-05-16 11:18:10 +08:00
Zhu Yongbo
bb9ec6e5d2 fix EdgeCraftRAG UI image build bug (#1964)
Signed-off-by: Yongbozzz <yongbo.zhu@intel.com>
2025-05-16 10:06:46 +08:00
xiguiw
94222d5783 Merge branch 'main' into update_vLLM 2025-05-16 09:04:30 +08:00
CICD-at-OPEA
274af9eabc Update vLLM version to v0.9.0
Signed-off-by: CICD-at-OPEA <CICD@opea.dev>
2025-05-15 22:41:49 +00:00
Daniel De León
3fb59a9769 Update DocSum README and environment configuration (#1917)
Signed-off-by: Daniel Deleon <daniel.de.leon@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: Eero Tamminen <eero.t.tamminen@intel.com>
Co-authored-by: Zhenzhong Xu <zhenzhong.xu@intel.com>
2025-05-15 11:58:58 -07:00
chen, suyue
410df80925 [CICD enhance] AvatarChatbot run CI with latest base image, group logs in GHA outputs. (#1930)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-05-15 11:22:49 +08:00
chen, suyue
8eac02e58b [CICD enhance] DBQnA run CI with latest base image, group logs in GHA outputs. (#1931)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-05-14 17:12:09 +08:00
ZePan110
9f80a18cb5 Integrate GraphRAG set_env to ut scripts. (#1943)
Integrate GraphRAG set_env to ut scripts.
Add README.md for UT scripts.

Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-14 13:12:35 +08:00
ZePan110
f2c8e0b4ff Integrate DocIndexRetriever set_env to ut scripts. (#1945)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-14 13:00:51 +08:00
alexsin368
fb53c536a3 AgentQnA - add support for remote server (#1900)
Signed-off-by: alexsin368 <alex.sin@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
2025-05-14 11:12:57 +08:00
chen, suyue
26d07019d0 [CICD enhance] CodeTrans run CI with latest base image, group logs in GHA outputs. (#1929)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-05-14 11:11:54 +08:00
ZePan110
bd6726c53a Blocking link checks that require a login (#1946)
Signed-off-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-05-14 10:57:16 +08:00
CICD-at-OPEA
238fb52a92 Update vLLM version to v0.8.5
Signed-off-by: CICD-at-OPEA <CICD@opea.dev>
2025-05-13 22:42:16 +00:00
Ying Hu
4a17638b5c Merge branch 'main' into update_vLLM 2025-05-13 16:00:56 +08:00
ZePan110
a0bdf8eab2 Add opea/vllm-rocm README.md link in docker_images_list.md (#1925)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-13 13:34:31 +08:00
ZePan110
99f2f940b6 Fix input check for helm test workflow (#1938)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-12 17:41:57 +08:00
Ying Hu
2596671d3f Update README.md for remove the docker installer (#1927)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-05-12 11:40:33 +08:00
Sun, Xuehao
7ffb4107e6 set fail-fast to false in vLLM update actions (#1926)
Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>
2025-05-12 11:30:29 +08:00
ZePan110
7590b055aa Integrate DBQnA set_env to ut scripts and enhanced validation checks. (#1915)
Integrate DBQnA set_env to ut scripts.
Add README.md for ut scripts.
Enhanced validation checks

Signed-off-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-05-12 10:19:18 +08:00
Eero Tamminen
4efb1e0833 Update paths to GenAIInfra scripts (#1923)
Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
2025-05-10 21:57:52 +08:00
Razvan Liviu Varzaru
ebb7c24ca8 Add ChatQnA docker-compose example on Intel Xeon using MariaDB Vector (#1916)
Signed-off-by: Razvan-Liviu Varzaru <razvan@mariadb.org>
Co-authored-by: Liang Lv <liang1.lv@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-05-08 21:08:15 -07:00
CICD-at-OPEA
2160d43a32 Update vLLM version to v0.8.5
Signed-off-by: CICD-at-OPEA <CICD@opea.dev>
2025-05-08 08:37:52 +00:00
Sun, Xuehao
bfefdfad34 Fix vllm version update workflow (#1919)
Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>
2025-05-08 16:36:37 +08:00
Sun, Xuehao
b467a13ec3 daily update vLLM&vLLM-fork version (#1914)
Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>
2025-05-08 10:34:36 +08:00
ZePan110
05011ebaac Integrate AudioQnA set_env to ut scripts. (#1897)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-05-08 09:14:44 +08:00
Melanie Hart Buehler
7bb05585b6 Move file processing from UI to DocSum backend service (#1899)
Signed-off-by: Melanie Buehler <melanie.h.buehler@intel.com>
2025-05-08 09:05:30 +08:00
Sun, Xuehao
f6013b8679 Add exempt-issue-labels to stale check workflow (#1861)
Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>
2025-05-07 11:35:37 +08:00
chen, suyue
505ec6d4b6 update PR reviewers (#1913)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-05-07 11:28:04 +08:00
lkk
ff66600ab4 Fix ui dockerfile. (#1909)
Signed-off-by: lkk <33276950+lkk12014402@users.noreply.github.com>
2025-05-06 16:34:16 +08:00
ZePan110
5375332fb3 Fix security issues for helm test workflow (#1908)
Signed-off-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-05-06 15:54:43 +08:00
Omar Khleif
df33800945 CodeGen Gradio UI Enhancements (#1904)
Signed-off-by: okhleif-IL <omar.khleif@intel.com>
2025-05-06 13:41:21 +08:00
Ying Hu
40e44dfcd6 Update README.md of ChatQnA for broken URL (#1907)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Neo Zhang Jianyu <jianyu.zhang@intel.com>
2025-05-06 13:21:31 +08:00
ZePan110
9259ba41a5 Remove invalid codeowner. (#1896)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-30 13:24:42 +08:00
ZePan110
5c7f5718ed Restore context in EdgeCraftRAG build.yaml. (#1895)
Restore context in EdgeCraftRAG build.yaml to avoid the issue of can't find Dockerfiles.

Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-30 11:09:21 +08:00
lkk
d334f5c8fd build cpu agent ui docker image. (#1894) 2025-04-29 23:58:52 +08:00
ZePan110
670d9f3d18 Fix security issue. (#1892)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-29 19:44:48 +08:00
Zhu Yongbo
555c4100b3 Install cpu version for components (#1888)
Signed-off-by: Yongbozzz <yongbo.zhu@intel.com>
2025-04-29 10:08:23 +08:00
ZePan110
04d527d3b0 Integrate set_env to ut scripts for CodeTrans. (#1868)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-28 13:53:50 +08:00
ZePan110
13c4749ca3 Fix security issue (#1884)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-28 13:52:50 +08:00
ZePan110
99b62ae49e Integrate DocSum set_env to ut scripts. (#1860)
Integrate DocSum set_env to ut scripts.
Add README.md for DocSum and InstructionTuning UT scripts.

Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-28 13:35:05 +08:00
chen, suyue
c546d96e98 downgrade tei version from 1.6 to 1.5, fix the chatqna perf regression (#1886)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-04-25 23:00:36 +08:00
chen, suyue
be5933ad85 Update benchmark scripts (#1883)
Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-04-25 17:05:48 +08:00
rbrugaro
18b4f39f27 README fixes Finance Example (#1882)
Signed-off-by: Rita Brugarolas <rita.brugarolas.brufau@intel.com>
Co-authored-by: Ying Hu <ying.hu@intel.com>
2025-04-24 23:58:08 -07:00
chyundunovDatamonsters
ef9290f245 DocSum - refactoring README.md for deploy application on ROCm (#1881)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
2025-04-25 13:36:40 +08:00
chyundunovDatamonsters
3b0bcb80a8 DocSum - Adding files to deploy an application in the K8S environment using Helm (#1758)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com>
Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com>
Co-authored-by: Artem Astafev <a.astafev@datamonsters.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2025-04-25 13:33:08 +08:00
Artem Astafev
ccc145ea1a Refine README.MD for SearchQnA on AMD ROCm platform (#1876)
Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
2025-04-25 10:16:03 +08:00
chyundunovDatamonsters
bb7a675665 ChatQnA - refactoring README.md for deploy application on ROCm (#1857)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
Signed-off-by: Chingis Yundunov <c.yundunov@datamonsters.com>
Co-authored-by: Chingis Yundunov <YundunovCN@sibedge.com>
Co-authored-by: Artem Astafev <a.astafev@datamonsters.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-04-25 08:52:24 +08:00
chen, suyue
f90a6d2a8e [CICD enhance] EdgeCraftRAG run CI with latest base image, group logs in GHA outputs. (#1877)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-04-24 16:18:44 +08:00
chyundunovDatamonsters
1fdab591d9 CodeTrans - refactoring README.md for deploy application on ROCm with Docker Compose (#1875)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
2025-04-24 15:28:57 +08:00
chen, suyue
13ea13862a Remove proxy in CodeTrans test (#1874)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-04-24 13:47:56 +08:00
ZePan110
1787d1ee98 Update image links. (#1866)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2025-04-24 13:34:41 +08:00
Artem Astafev
db4bf1a4c3 Refine README.MD for AMD ROCm docker compose deployment (#1856)
Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
2025-04-24 11:00:51 +08:00
chen, suyue
f7002fcb70 Set opea_branch for CD test (#1870)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-04-24 09:49:20 +08:00
Artem Astafev
c39c875211 Fix compose file and functional tests for Avatarchatbot on AMD ROCm platform (#1872)
Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
2025-04-23 22:58:25 +08:00
Artem Astafev
c2e9a259fe Refine AuidoQnA README.MD for AMD ROCm docker compose deployment (#1862)
Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
2025-04-23 13:55:01 +08:00
Omar Khleif
48eaf9c1c9 Added CodeGen Gradio README link to Docker Images List (#1864)
Signed-off-by: okhleif-IL <omar.khleif@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
2025-04-22 15:28:49 -07:00
Ervin Castelino
a39824f142 Update README.md of DBQnA (#1855)
Co-authored-by: Ying Hu <ying.hu@intel.com>
2025-04-22 15:56:37 -04:00
Dina Suehiro Jones
e10e6dd002 Fixes for MultimodalQnA with the Milvus vector db (#1859)
Signed-off-by: Dina Suehiro Jones <dina.s.jones@intel.com>
2025-04-21 16:05:11 -07:00
105 changed files with 2216 additions and 1005 deletions

21
.github/CODEOWNERS vendored
View File

@@ -1,16 +1,18 @@
* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
# Code owners will review PRs within their respective folders.
* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
/.github/ suyue.chen@intel.com ze.pan@intel.com
/AgentQnA/ kaokao.lv@intel.com minmin.hou@intel.com
/AgentQnA/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com
/AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com
/AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com
/ChatQnA/ liang1.lv@intel.com letong.han@intel.com
/CodeGen/ liang1.lv@intel.com xinyao.wang@intel.com
/CodeTrans/ sihan.chen@intel.com xinyao.wang@intel.com
/CodeGen/ liang1.lv@intel.com
/CodeTrans/ sihan.chen@intel.com
/DBQnA/ supriya.krishnamurthi@intel.com liang1.lv@intel.com
/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
/DocSum/ letong.han@intel.com xinyao.wang@intel.com
/DocIndexRetriever/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com chendi.xue@intel.com
/DocSum/ letong.han@intel.com
/EdgeCraftRAG/ yongbo.zhu@intel.com mingyuan.qi@intel.com
/FaqGen/ yogesh.pandey@intel.com xinyao.wang@intel.com
/FinanceAgent/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
/GraphRAG/ rita.brugarolas.brufau@intel.com abolfazl.shahbazi@intel.com
/InstructionTuning/ xinyu.ye@intel.com kaokao.lv@intel.com
/MultimodalQnA/ melanie.h.buehler@intel.com tiep.le@intel.com
@@ -19,5 +21,6 @@
/SearchQnA/ sihan.chen@intel.com letong.han@intel.com
/Text2Image/ wenjiao.yue@intel.com xinyu.ye@intel.com
/Translation/ liang1.lv@intel.com sihan.chen@intel.com
/VideoQnA/ huiling.bao@intel.com xinyao.wang@intel.com
/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
/VideoQnA/ huiling.bao@intel.com
/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
/WorkflowExecAgent/ joshua.jian.ern.liew@intel.com kaokao.lv@intel.com

5
.github/env/_build_image.sh vendored Normal file
View File

@@ -0,0 +1,5 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
export VLLM_VER=v0.9.0
export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0

View File

@@ -75,17 +75,12 @@ jobs:
run: |
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
source ${{ github.workspace }}/.github/env/_build_image.sh
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER=v0.8.3
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../
git clone -b ${VLLM_VER} --single-branch https://github.com/vllm-project/vllm.git
fi
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../
git clone -b ${VLLM_FORK_VER} --single-branch https://github.com/HabanaAI/vllm-fork.git
fi
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git rev-parse HEAD && cd ../

View File

@@ -2,7 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
name: Helm Chart E2e Test For Call
permissions: read-all
permissions:
contents: read
on:
workflow_call:
inputs:
@@ -135,16 +137,28 @@ jobs:
env:
example: ${{ inputs.example }}
run: |
CHART_NAME="${example,,}" # CodeGen
echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
echo "NAMESPACE=${CHART_NAME}-$(head -c 4 /dev/urandom | xxd -p)" >> $GITHUB_ENV
echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
echo "should_cleanup=false" >> $GITHUB_ENV
echo "skip_validate=false" >> $GITHUB_ENV
echo "CHART_FOLDER=${example}/kubernetes/helm" >> $GITHUB_ENV
if [[ ! "$example" =~ ^[a-zA-Z0-9]{1,20}$ ]] || [[ "$example" =~ \.\. ]] || [[ "$example" == -* || "$example" == *- ]]; then
echo "Error: Invalid input - only lowercase alphanumeric and internal hyphens allowed"
exit 1
fi
# SAFE_PREFIX="kb-"
CHART_NAME="${SAFE_PREFIX}$(echo "$example" | tr '[:upper:]' '[:lower:]')"
RAND_SUFFIX=$(openssl rand -hex 2 | tr -dc 'a-f0-9')
cat <<EOF >> $GITHUB_ENV
CHART_NAME=${CHART_NAME}
RELEASE_NAME=${CHART_NAME}-$(date +%s)
NAMESPACE=ns-${CHART_NAME}-${RAND_SUFFIX}
ROLLOUT_TIMEOUT_SECONDS=600s
TEST_TIMEOUT_SECONDS=600s
KUBECTL_TIMEOUT_SECONDS=60s
should_cleanup=false
skip_validate=false
CHART_FOLDER=${example}/kubernetes/helm
EOF
echo "Generated safe variables:" >> $GITHUB_STEP_SUMMARY
echo "- CHART_NAME: ${CHART_NAME}" >> $GITHUB_STEP_SUMMARY
- name: Helm install
id: install

View File

@@ -0,0 +1,94 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Daily update vLLM & vLLM-fork version
on:
schedule:
- cron: "30 22 * * *"
workflow_dispatch:
env:
BRANCH_NAME: "update"
USER_NAME: "CICD-at-OPEA"
USER_EMAIL: "CICD@opea.dev"
jobs:
freeze-tag:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- repo: vLLM
repo_name: vllm-project/vllm
ver_name: VLLM_VER
- repo: vLLM-fork
repo_name: HabanaAI/vllm-fork
ver_name: VLLM_FORK_VER
fail-fast: false
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.ref }}
- name: Set up Git
run: |
git config --global user.name ${{ env.USER_NAME }}
git config --global user.email ${{ env.USER_EMAIL }}
git remote set-url origin https://${{ env.USER_NAME }}:"${{ secrets.ACTION_TOKEN }}"@github.com/${{ github.repository }}.git
git fetch
if git ls-remote https://github.com/${{ github.repository }}.git "refs/heads/${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | grep -q "refs/heads/${{ env.BRANCH_NAME }}_${{ matrix.repo }}"; then
echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} exists"
git checkout ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
else
echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} not exists"
git checkout -b ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
git push origin ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} created successfully"
fi
- name: Run script
run: |
latest_vllm_ver=$(curl -s "https://api.github.com/repos/${{ matrix.repo_name }}/tags" | jq '.[0].name' -)
latest_vllm_ver=$(echo "$latest_vllm_ver" | sed 's/"//g')
echo "latest_vllm_ver=${latest_vllm_ver}" >> "$GITHUB_ENV"
find . -type f -name "*.sh" -exec sed -i "s/${{ matrix.ver_name }}=.*/${{ matrix.ver_name }}=${latest_vllm_ver}/" {} \;
- name: Commit changes
run: |
git add .
if git diff-index --quiet HEAD --; then
echo "No changes detected, skipping commit."
exit 1
else
git commit -s -m "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
git push --set-upstream origin ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
fi
- name: Create Pull Request
env:
GH_TOKEN: ${{ secrets.ACTION_TOKEN }}
run: |
pr_count=$(curl -H "Authorization: token ${{ secrets.ACTION_TOKEN }}" -s "https://api.github.com/repos/${{ github.repository }}/pulls?state=all&head=${{ env.USER_NAME }}:${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | jq '. | length')
if [ $pr_count -gt 0 ]; then
echo "Pull Request exists"
pr_number=$(curl -H "Authorization: token ${{ secrets.ACTION_TOKEN }}" -s "https://api.github.com/repos/${{ github.repository }}/pulls?state=all&head=${{ env.USER_NAME }}:${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | jq '.[0].number')
gh pr edit ${pr_number} \
--title "Update ${{ matrix.repo }} version to ${latest_vllm_ver}" \
--body "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
echo "Pull Request updated successfully"
else
echo "Pull Request does not exists..."
gh pr create \
-B main \
-H ${{ env.BRANCH_NAME }}_${{ matrix.repo }} \
--title "Update ${{ matrix.repo }} version to ${latest_vllm_ver}" \
--body "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
echo "Pull Request created successfully"
fi

View File

@@ -26,3 +26,4 @@ jobs:
close-pr-message: "This PR was closed because it has been stalled for 7 days with no activity."
repo-token: ${{ secrets.ACTION_TOKEN }}
start-date: "2025-03-01T00:00:00Z"
exempt-issue-labels: "Backlog"

View File

@@ -19,6 +19,9 @@ concurrency:
jobs:
job1:
name: Get-Test-Matrix
permissions:
contents: read
pull-requests: read
runs-on: ubuntu-latest
outputs:
run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}

View File

@@ -23,6 +23,7 @@ jobs:
- name: Check the Validity of Hyperlinks
run: |
cd ${{github.workspace}}
delay=15
fail="FALSE"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
@@ -35,15 +36,20 @@ jobs:
# echo $url_line
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid link from ${{github.workspace}}/$path: $url"
fail="TRUE"
if [[ "$url" == "https://platform.openai.com/api-keys"* ]]; then
echo "Link "$url" from ${{github.workspace}}/$path needs to be verified by a real person."
else
sleep $delay
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
if [ "$response" -ne 200 ]; then
echo "**********Validation failed ($response), try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid link ($response_retry) from ${{github.workspace}}/$path: $url"
fail="TRUE"
fi
fi
fi
done

View File

@@ -99,7 +99,7 @@ flowchart LR
#### First, clone the `GenAIExamples` repo.
```
```bash
export WORKDIR=<your-work-directory>
cd $WORKDIR
git clone https://github.com/opea-project/GenAIExamples.git
@@ -109,7 +109,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
##### For proxy environments only
```
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
@@ -118,14 +118,24 @@ export no_proxy="Your_No_Proxy"
##### For using open-source llms
```
Set up a [HuggingFace](https://huggingface.co/) account and generate a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
Then set an environment variable with the token and another for a directory to download the models:
```bash
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> # to avoid redownloading models
```
##### [Optional] OPANAI_API_KEY to use OpenAI models
##### [Optional] OPENAI_API_KEY to use OpenAI models or Intel® AI for Enterprise Inference
```
To use OpenAI models, generate a key following these [instructions](https://platform.openai.com/api-keys).
To use a remote server running Intel® AI for Enterprise Inference, contact the cloud service provider or owner of the on-prem machine for a key to access the desired model on the server.
Then set the environment variable `OPENAI_API_KEY` with the key contents:
```bash
export OPENAI_API_KEY=<your-openai-key>
```
@@ -133,16 +143,18 @@ export OPENAI_API_KEY=<your-openai-key>
##### Gaudi
```
```bash
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
```
##### Xeon
```
```bash
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
```
For running
### 2. Launch the multi-agent system. </br>
We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.
@@ -184,14 +196,37 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/
#### Launch on Xeon
On Xeon, only OpenAI models are supported. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
On Xeon, OpenAI models and models deployed on a remote server are supported. Both methods require an API key.
```bash
export OPENAI_API_KEY=<your-openai-key>
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
```
##### OpenAI Models
The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
```bash
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
```
##### Models on Remote Server
When models are deployed on a remote server with Intel® AI for Enterprise Inference, a base URL and an API key are required to access them. To run the Agent microservice on Xeon while using models deployed on a remote server, add `compose_remote.yaml` to the `docker compose` command and set additional environment variables.
###### Notes
- `OPENAI_API_KEY` is already set in a previous step.
- `model` is used to overwrite the value set for this environment variable in `set_env.sh`.
- `LLM_ENDPOINT_URL` is the base URL given from the owner of the on-prem machine or cloud service provider. It will follow this format: "https://<DNS>". Here is an example: "https://api.inference.example.com".
```bash
export model=<name-of-model-card>
export LLM_ENDPOINT_URL=<http-endpoint-of-remote-server>
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml -f compose_remote.yaml up -d
```
### 3. Ingest Data into the vector database
The `run_ingest_data.sh` script will use an example jsonl file to ingest example documents into a vector database. Other ways to ingest data and other types of documents supported can be found in the OPEA dataprep microservice located in the opea-project/GenAIComps repo.
@@ -208,12 +243,18 @@ bash run_ingest_data.sh
The UI microservice is launched in the previous step with the other microservices.
To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
1. `create Admin Account` with a random value
2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api
1. Click on the arrow above `Get started`. Create an admin account with a name, email, and password.
2. Add an OpenAI-compatible API endpoint. In the upper right, click on the circle button with the user's initial, go to `Admin Settings`->`Connections`. Under `Manage OpenAI API Connections`, click on the `+` to add a connection. Fill in these fields:
- **URL**: `http://${ip_address}:9090/v1`, do not forget the `v1`
- **Key**: any value
- **Model IDs**: any name i.e. `opea-agent`, then press `+` to add it
Click "Save".
![opea-agent-setting](assets/img/opea-agent-setting.png)
3. test opea agent with ui
3. Test OPEA agent with UI. Return to `New Chat` and ensure the model (i.e. `opea-agent`) is selected near the upper left. Enter in any prompt to interact with the agent.
![opea-agent-test](assets/img/opea-agent-test.png)

View File

@@ -0,0 +1,18 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
worker-rag-agent:
environment:
llm_endpoint_url: ${LLM_ENDPOINT_URL}
api_key: ${OPENAI_API_KEY}
worker-sql-agent:
environment:
llm_endpoint_url: ${LLM_ENDPOINT_URL}
api_key: ${OPENAI_API_KEY}
supervisor-react-agent:
environment:
llm_endpoint_url: ${LLM_ENDPOINT_URL}
api_key: ${OPENAI_API_KEY}

View File

@@ -37,8 +37,8 @@ function build_agent_docker_image_gaudi_vllm() {
get_genai_comps
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build agent image with --no-cache..."
service_list="agent agent-ui vllm-gaudi"

View File

@@ -6,8 +6,8 @@
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
export host_ip="192.165.1.21"
export HUGGINGFACEHUB_API_TOKEN=${YOUR_HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
# <token>
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

View File

@@ -6,8 +6,8 @@
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
export host_ip=""
export external_host_ip=""
export host_ip=${ip_address}
export external_host_ip=${ip_address}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_CACHE_DIR="./data"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"

View File

@@ -14,7 +14,8 @@ export MEGA_SERVICE_HOST_IP=${host_ip}
export WHISPER_SERVER_HOST_IP=${host_ip}
export SPEECHT5_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export GPT_SOVITS_SERVER_HOST_IP=${host_ip}
export GPT_SOVITS_SERVER_PORT=9880
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006

45
AudioQnA/tests/README.md Normal file
View File

@@ -0,0 +1,45 @@
# AudioQnA E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Xeon with TGI:
```bash
bash test_compose_tgi_on_xeon.sh
```
On Intel Xeon with vLLM:
```bash
bash test_compose_on_xeon.sh
```
On Intel Gaudi with TGI:
```bash
bash test_compose_tgi_on_gaudi.sh
```
On Intel Gaudi with vLLM:
```bash
bash test_compose_on_gaudi.sh
```
On AMD ROCm with TGI:
```bash
bash test_compose_on_rocm.sh
```
On AMD ROCm with vLLM:
```bash
bash test_compose_vllm_on_rocm.sh
```

View File

@@ -27,7 +27,7 @@ function build_docker_images() {
git clone https://github.com/vllm-project/vllm.git
cd ./vllm/
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../
@@ -40,21 +40,8 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
export MEGA_SERVICE_HOST_IP=${ip_address}
export WHISPER_SERVER_HOST_IP=${ip_address}
export GPT_SOVITS_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export WHISPER_SERVER_PORT=7066
export GPT_SOVITS_SERVER_PORT=9880
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}
source set_env.sh
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers

View File

@@ -27,9 +27,9 @@ function build_docker_images() {
git clone https://github.com/HabanaAI/vllm-fork.git
cd vllm-fork/
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
echo "Check out vLLM tag ${VLLM_FORK_VER}"
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi"
@@ -40,24 +40,8 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export MEGA_SERVICE_HOST_IP=${ip_address}
export WHISPER_SERVER_HOST_IP=${ip_address}
export SPEECHT5_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}
source set_env.sh
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers

View File

@@ -35,20 +35,7 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
export MEGA_SERVICE_HOST_IP=${ip_address}
export WHISPER_SERVER_HOST_IP=${ip_address}
export SPEECHT5_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
source set_env.sh
# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
n=0

View File

@@ -27,7 +27,7 @@ function build_docker_images() {
git clone https://github.com/vllm-project/vllm.git
cd ./vllm/
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../
@@ -40,21 +40,8 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
export MEGA_SERVICE_HOST_IP=${ip_address}
export WHISPER_SERVER_HOST_IP=${ip_address}
export SPEECHT5_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}
source set_env.sh
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers

View File

@@ -34,21 +34,8 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
export MEGA_SERVICE_HOST_IP=${ip_address}
export WHISPER_SERVER_HOST_IP=${ip_address}
export SPEECHT5_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}
source set_env.sh
# Start Docker Containers
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0

View File

@@ -34,21 +34,8 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
export MEGA_SERVICE_HOST_IP=${ip_address}
export WHISPER_SERVER_HOST_IP=${ip_address}
export SPEECHT5_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}
source set_env.sh
# Start Docker Containers
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0

View File

@@ -33,27 +33,7 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
export host_ip=${ip_address}
export external_host_ip=${ip_address}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_CACHE_DIR="./data"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export VLLM_SERVICE_PORT="8081"
export MEGA_SERVICE_HOST_IP=${host_ip}
export WHISPER_SERVER_HOST_IP=${host_ip}
export SPEECHT5_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=${VLLM_SERVICE_PORT}
export BACKEND_SERVICE_PORT=3008
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_ENDPOINT=http://${external_host_ip}:${BACKEND_SERVICE_PORT}/v1/audioqna
source set_env_vllm.sh
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers

View File

@@ -1,8 +1,9 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG IMAGE_REPO=opea
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
FROM $IMAGE_REPO/comps-base:$BASE_TAG
COPY ./avatarchatbot.py $HOME/avatarchatbot.py

View File

@@ -5,6 +5,8 @@ services:
avatarchatbot:
build:
args:
IMAGE_REPO: ${REGISTRY:-opea}
BASE_TAG: ${TAG:-latest}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}

View File

@@ -24,19 +24,13 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
@@ -128,19 +122,29 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo y | docker builder prune --all
echo y | docker image prune
echo "::endgroup::"
docker builder prune --all -f
docker image prune -f
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
# validate_microservices
validate_megaservice
# validate_frontend
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker builder prune --all
echo y | docker image prune
echo "::endgroup::"
docker builder prune --all -f
docker image prune -f
}

View File

@@ -25,6 +25,10 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="avatarchatbot whisper asr speecht5 tts wav2lip animation"
@@ -138,11 +142,6 @@ function validate_megaservice() {
}
#function validate_frontend() {
#}
function stop_docker() {
cd $WORKPATH/docker_compose/amd/gpu/rocm
docker compose down && docker compose rm -f
@@ -151,19 +150,27 @@ function stop_docker() {
function main() {
echo $OPENAI_API_KEY
echo $OPENAI_KEY
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
# validate_microservices
sleep 30
validate_megaservice
# validate_frontend
stop_docker
echo "::endgroup::"
echo y | docker system prune
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
docker system prune -f
}

View File

@@ -24,19 +24,13 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="avatarchatbot whisper speecht5 wav2lip animation"
@@ -127,16 +121,28 @@ function stop_docker() {
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
# validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker builder prune --all
echo y | docker image prune
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
docker system prune -f
}

View File

@@ -96,20 +96,21 @@ flowchart LR
The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware.
| Category | Deployment Option | Description |
| ----------------------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| On-premise Deployments | Docker compose | [ChatQnA deployment on Xeon](./docker_compose/intel/cpu/xeon) |
| | | [ChatQnA deployment on AI PC](./docker_compose/intel/cpu/aipc) |
| | | [ChatQnA deployment on Gaudi](./docker_compose/intel/hpu/gaudi) |
| | | [ChatQnA deployment on Nvidia GPU](./docker_compose/nvidia/gpu) |
| | | [ChatQnA deployment on AMD ROCm](./docker_compose/amd/gpu/rocm) |
| | Kubernetes | [Helm Charts](./kubernetes/helm) |
| Cloud Service Providers | AWS | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using meta-llama/Meta-Llama-3-8B-Instruct ](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
| | | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using TII Falcon2-11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) |
| | GCP | [Terraform deployment on 5th Gen Intel Xeon with Intel AMX(support Confidential AI by using Intel® TDX](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
| | Azure | [Terraform deployment on 4th/5th Gen Intel Xeon with Intel AMX & Intel TDX](https://github.com/intel/terraform-intel-azure-linux-vm/tree/main/examples/azure-gen-ai-xeon-opea-chatqna-tdx) |
| | Intel Tiber AI Cloud | Coming Soon |
| | Any Xeon based Ubuntu system | [ChatQnA Ansible Module for Ubuntu 20.04](https://github.com/intel/optimized-cloud-recipes/tree/main/recipes/ai-opea-chatqna-xeon) .Use this if you are not using Terraform and have provisioned your system either manually or with another tool, including directly on bare metal. |
| Category | Deployment Option | Description |
| ------------------------------------------------------------------------------------------------------------------------------ | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| On-premise Deployments | Docker compose | [ChatQnA deployment on Xeon](./docker_compose/intel/cpu/xeon/README.md) |
| | | [ChatQnA deployment on AI PC](./docker_compose/intel/cpu/aipc/README.md) |
| | | [ChatQnA deployment on Gaudi](./docker_compose/intel/hpu/gaudi/README.md) |
| | | [ChatQnA deployment on Nvidia GPU](./docker_compose/nvidia/gpu/README.md) |
| | | [ChatQnA deployment on AMD ROCm](./docker_compose/amd/gpu/rocm/README.md) |
| Cloud Platforms Deployment on AWS, GCP, Azure, IBM Cloud,Oracle Cloud, [Intel® Tiber™ AI Cloud](https://ai.cloud.intel.com/) | Docker Compose | [Getting Started Guide: Deploy the ChatQnA application across multiple cloud platforms](https://github.com/opea-project/docs/tree/main/getting-started/README.md) |
| | Kubernetes | [Helm Charts](./kubernetes/helm/README.md) |
| Automated Terraform Deployment on Cloud Service Providers | AWS | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using meta-llama/Meta-Llama-3-8B-Instruct ](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
| | | [Terraform deployment on 4th Gen Intel Xeon with Intel AMX using TII Falcon2-11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) |
| | GCP | [Terraform deployment on 5th Gen Intel Xeon with Intel AMX(support Confidential AI by using Intel® TDX](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) |
| | Azure | [Terraform deployment on 4th/5th Gen Intel Xeon with Intel AMX & Intel TDX](https://github.com/intel/terraform-intel-azure-linux-vm/tree/main/examples/azure-gen-ai-xeon-opea-chatqna-tdx) |
| | Intel Tiber AI Cloud | Coming Soon |
| | Any Xeon based Ubuntu system | [ChatQnA Ansible Module for Ubuntu 20.04](https://github.com/intel/optimized-cloud-recipes/tree/main/recipes/ai-opea-chatqna-xeon). Use this if you are not using Terraform and have provisioned your system either manually or with another tool, including directly on bare metal. |
## Monitor and Tracing

View File

@@ -1,35 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Update the package index
sudo apt-get -y update
# Install prerequisites
sudo apt-get -y install ca-certificates curl
# Create the directory for the Docker GPG key
sudo install -m 0755 -d /etc/apt/keyrings
# Add Docker's official GPG key
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
# Set permissions for the GPG key
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add Docker repository to the sources list
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# Update the package index with Docker packages
sudo apt-get -y update
# Install Docker packages
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# add existing user
sudo usermod -aG docker $USER
# Optional: Verify that Docker is installed correctly
sudo docker --version

View File

@@ -156,6 +156,7 @@ In the context of deploying a ChatQnA pipeline on an Intel® Xeon® platform, we
| [compose_faqgen_tgi.yaml](./compose_faqgen_tgi.yaml) | Enables FAQ generation using TGI as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). |
| [compose.telemetry.yaml](./compose.telemetry.yaml) | Helper file for telemetry features for vllm. Can be used along with any compose files that serves vllm |
| [compose_tgi.telemetry.yaml](./compose_tgi.telemetry.yaml) | Helper file for telemetry features for tgi. Can be used along with any compose files that serves tgi |
| [compose_mariadb.yaml](./compose_mariadb.yaml) | Uses MariaDB Server as the vector database. All other configurations remain the same as the default |
## ChatQnA with Conversational UI (Optional)

View File

@@ -0,0 +1,259 @@
# Deploying ChatQnA with MariaDB Vector on Intel® Xeon® Processors
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel® Xeon® servers. The pipeline integrates **MariaDB Vector** as the vector database and includes microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
---
## Table of Contents
1. [Build Docker Images](#build-docker-images)
2. [Validate Microservices](#validate-microservices)
3. [Launch the UI](#launch-the-ui)
4. [Launch the Conversational UI (Optional)](#launch-the-conversational-ui-optional)
---
## Build Docker Images
First of all, you need to build Docker Images locally and install the python package of it.
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 1. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile .
```
### 2. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
cd ..
```
### 3. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
cd ../..
```
### 4. Build UI Docker Image
Build frontend Docker image via below command:
```bash
cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
cd ../../..
```
### 5. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
```bash
cd GenAIExamples/ChatQnA/ui
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep/ingest"
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
cd ../../..
```
### 6. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile .
```
Then run the command `docker images`, you will have the following 5 Docker Images:
1. `opea/dataprep:latest`
2. `opea/retriever:latest`
3. `opea/chatqna:latest`
4. `opea/chatqna-ui:latest`
5. `opea/nginx:latest`
## Start Microservices
### Required Models
By default, the embedding, reranking and LLM models are set to a default value as listed below:
| Service | Model |
| --------- | ----------------------------------- |
| Embedding | BAAI/bge-base-en-v1.5 |
| Reranking | BAAI/bge-reranker-base |
| LLM | meta-llama/Meta-Llama-3-8B-Instruct |
Change the `xxx_MODEL_ID` below for your needs.
### Setup Environment Variables
Since the `compose.yaml` will consume some environment variables, you need to set them up in advance as below.
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
> Change the External_Public_IP below with the actual IPV4 value
```bash
export host_ip="External_Public_IP"
```
> Change to your actual Huggingface API Token value
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
```bash
export no_proxy=${your_no_proxy},chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-mariadb-vector,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service
```
```bash
export no_proxy=${your_no_proxy}
export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export MARIADB_DATABASE="vectordb"
export MARIADB_USER="chatqna"
export MARIADB_PASSWORD="password"
```
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
### Start all the services Docker Containers
> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
```bash
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
docker compose -f compose_mariadb.yaml up -d
```
### Validate Microservices
Follow the instructions to validate MicroServices.
For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md).
1. TEI Embedding Service
```bash
curl ${host_ip}:6040/embed \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```
2. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
```bash
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
curl http://${host_ip}:6045/v1/retrieval \
-X POST \
-d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
-H 'Content-Type: application/json'
```
3. TEI Reranking Service
```bash
curl http://${host_ip}:6041/rerank \
-X POST \
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-H 'Content-Type: application/json'
```
4. LLM Backend Service
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready.
Try the command below to check whether the LLM service is ready.
```bash
docker logs vllm-service 2>&1 | grep complete
```
If the service is ready, you will get the response like below.
```text
INFO: Application startup complete.
```
Then try the `cURL` command below to validate vLLM service.
```bash
curl http://${host_ip}:6042/v1/chat/completions \
-X POST \
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-H 'Content-Type: application/json'
```
5. MegaService
```bash
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
"messages": "What is the revenue of Nike in 2023?"
}'
```
6. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:
Update Knowledge Base via Local File Upload:
```bash
curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \
-H "Content-Type: multipart/form-data" \
-F "files=@./your_file.pdf"
```
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
Add Knowledge Base via HTTP Links:
```bash
curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \
-H "Content-Type: multipart/form-data" \
-F 'link_list=["https://opea.dev"]'
```
## Launch the UI
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
```yaml
chatqna-xeon-ui-server:
image: opea/chatqna-ui:latest
...
ports:
- "80:5173"
```
![project-screenshot](../../../../assets/img/chat_ui_init.png)
Here is an example of running ChatQnA:
![project-screenshot](../../../../assets/img/chat_ui_response.png)

View File

@@ -0,0 +1,185 @@
# Copyright (C) 2025 MariaDB Foundation
# SPDX-License-Identifier: Apache-2.0
services:
mariadb-server:
image: mariadb:latest
container_name: mariadb-server
ports:
- "3306:3306"
environment:
- MARIADB_DATABASE=${MARIADB_DATABASE}
- MARIADB_USER=${MARIADB_USER}
- MARIADB_PASSWORD=${MARIADB_PASSWORD}
- MARIADB_RANDOM_ROOT_PASSWORD=1
healthcheck:
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
dataprep-mariadb-vector:
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
container_name: dataprep-mariadb-vector
depends_on:
mariadb-server:
condition: service_healthy
tei-embedding-service:
condition: service_started
ports:
- "6007:5000"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR"
MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
interval: 10s
timeout: 5s
retries: 50
restart: unless-stopped
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
ports:
- "6006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
container_name: retriever-mariadb-vector
depends_on:
mariadb-server:
condition: service_healthy
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LOGFLAG: ${LOGFLAG}
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MARIADBVECTOR"
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-server
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: vllm-service
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 128g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
VLLM_CPU_KVCACHE_SPACE: 40
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
chatqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
mariadb-server:
condition: service_healthy
dataprep-mariadb-vector:
condition: service_healthy
tei-embedding-service:
condition: service_started
retriever:
condition: service_started
tei-reranking-service:
condition: service_started
vllm-service:
condition: service_healthy
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm-service
- LLM_SERVER_PORT=80
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chatqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chatqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-xeon-nginx-server
depends_on:
- chatqna-xeon-backend-server
- chatqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-mariadb-vector
- DATAPREP_SERVICE_PORT=5000
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Copyright (C) 2025 MariaDB Foundation
# SPDX-License-Identifier: Apache-2.0
pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
fi
export host_ip=$(hostname -I | awk '{print $1}')
export MARIADB_DATABASE="vectordb"
export MARIADB_USER="chatqna"
export MARIADB_PASSWORD="password"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export LOGFLAG=""
export no_proxy="$no_proxy,chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,jaeger,prometheus,grafana,node-exporter"
export LLM_SERVER_PORT=9000
export NGINX_PORT=80

View File

@@ -24,8 +24,8 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever llm-faqgen vllm-gaudi nginx"

View File

@@ -24,7 +24,7 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../

View File

@@ -24,7 +24,7 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../

View File

@@ -24,8 +24,8 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi guardrails nginx"

View File

@@ -0,0 +1,176 @@
#!/bin/bash
# Copyright (C) 2025 MariaDB Foundation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
# make sure NOT change the pwd
cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
export MARIADB_DATABASE="vectordb"
export MARIADB_USER="chatqna"
export MARIADB_PASSWORD="test"
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}
# Start Docker Containers
docker compose -f compose_mariadb.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
sleep 3m
# tei for embedding service
validate_service \
"${ip_address}:6006/embed" \
"\[\[" \
"tei-embedding" \
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
# retrieval microservice
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
validate_service \
"${ip_address}:7000/v1/retrieval" \
" " \
"retrieval" \
"retriever-mariadb-vector" \
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
# tei for rerank microservice
validate_service \
"${ip_address}:8808/rerank" \
'{"index":1,"score":' \
"tei-rerank" \
"tei-reranking-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# vllm for llm service
validate_service \
"${ip_address}:9009/v1/chat/completions" \
"content" \
"vllm-llm" \
"vllm-service" \
'{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_service \
"${ip_address}:8888/v1/chatqna" \
"Nike" \
"mega-chatqna" \
"chatqna-xeon-backend-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose down
}
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
docker system prune -f
}
main

View File

@@ -25,7 +25,7 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
# make sure NOT change the pwd

View File

@@ -24,8 +24,8 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"

View File

@@ -24,7 +24,7 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
# make sure NOT change the pwd

View File

@@ -25,7 +25,7 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
# Not change the pwd

View File

@@ -24,7 +24,7 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
# Not change the pwd

View File

@@ -24,8 +24,8 @@ function build_docker_images() {
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"

View File

@@ -25,7 +25,7 @@ function build_docker_images() {
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
# Not change the pwd

View File

@@ -43,9 +43,9 @@ function build_docker_images() {
# Download Gaudi vllm of latest tag
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
echo "Check out vLLM tag ${VLLM_FORK_VER}"
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codegen codegen-gradio-ui llm-textgen vllm-gaudi dataprep retriever embedding"

View File

@@ -42,7 +42,7 @@ function build_docker_images() {
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
cd ../

View File

@@ -46,13 +46,24 @@ dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
# Define the functions that will be used in the app
def add_to_history(prompt, history):
history.append([prompt["text"], ""])
return history, ""
def conversation_history(prompt, index, use_agent, history):
print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}")
history.append([prompt, ""])
response_generator = generate_code(prompt, index, use_agent)
history = add_to_history(prompt, history)[0]
response_generator = generate_code(prompt["text"], index, use_agent)
for token in response_generator:
history[-1][-1] += token
yield history
yield history, ""
def clear_history():
return ""
def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
@@ -287,19 +298,32 @@ def get_file_names(files):
# Define UI components
with gr.Blocks() as ui:
with gr.Tab("Code Generation"):
gr.Markdown("### Generate Code from Natural Language")
chatbot = gr.Chatbot(label="Chat History")
prompt_input = gr.Textbox(label="Enter your query")
with gr.Column():
with gr.Row(equal_height=True):
with gr.Row():
with gr.Column(scale=2):
database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None", scale=10)
db_refresh_button = gr.Button("Refresh Dropdown", scale=0.1)
db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown)
use_agent = gr.Checkbox(label="Use Agent", container=False)
generate_button = gr.Button("Generate Code")
generate_button.click(
conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot
with gr.Column(scale=9):
gr.Markdown("### Generate Code from Natural Language")
chatbot = gr.Chatbot(label="Chat History")
with gr.Row(equal_height=True):
with gr.Column(scale=8):
prompt_input = gr.MultimodalTextbox(
show_label=False, interactive=True, placeholder="Enter your query", sources=[]
)
with gr.Column(scale=1, min_width=150):
with gr.Row(elem_id="buttons") as button_row:
clear_btn = gr.Button(value="🗑️ Clear", interactive=True)
clear_btn.click(clear_history, None, chatbot)
prompt_input.submit(add_to_history, inputs=[prompt_input, chatbot], outputs=[chatbot, prompt_input])
prompt_input.submit(
conversation_history,
inputs=[prompt_input, database_dropdown, use_agent, chatbot],
outputs=[chatbot, prompt_input],
)
with gr.Tab("Resource Management"):
@@ -315,7 +339,7 @@ with gr.Blocks() as ui:
)
with gr.Column(scale=3):
file_upload = gr.File(label="Upload Files", file_count="multiple")
url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)")
url_input = gr.Textbox(label="Media to be ingested. Append URL's in a new line (Shift + Enter)")
upload_button = gr.Button("Upload", variant="primary")
upload_status = gr.Textbox(label="Upload Status")
file_upload.change(get_file_names, inputs=file_upload, outputs=url_input)

View File

@@ -1,8 +1,9 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG IMAGE_REPO=opea
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
FROM $IMAGE_REPO/comps-base:$BASE_TAG
COPY ./code_translation.py $HOME/code_translation.py

View File

@@ -8,14 +8,14 @@
# which can be used to connect to the server from the Internet. It must be specified in the EXTERNAL_HOST_IP variable.
# If the server is used only on the internal network or has a direct external address,
# specify it in HOST_IP and in EXTERNAL_HOST_IP.
export HOST_IP=''
export EXTERNAL_HOST_IP=''
export HOST_IP=${ip_address}
export EXTERNAL_HOST_IP=${ip_address}
### Model ID
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
### The port of the TGI service. On this port, the TGI service will accept connections
export CODETRANS_TGI_SERVICE_PORT=18156
export CODETRANS_TGI_SERVICE_PORT=8008
### The endpoint of the TGI service to which requests to this service will be sent (formed from previously set variables)
export CODETRANS_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_TGI_SERVICE_PORT}"
@@ -24,7 +24,7 @@ export CODETRANS_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_TGI_SERVICE_POR
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
### The port of the LLM service. On this port, the LLM service will accept connections
export CODETRANS_LLM_SERVICE_PORT=18157
export CODETRANS_LLM_SERVICE_PORT=9000
### The IP address or domain name of the server for CodeTrans MegaService
export CODETRANS_MEGA_SERVICE_HOST_IP=${HOST_IP}
@@ -36,7 +36,7 @@ export CODETRANS_LLM_SERVICE_HOST_IP=${HOST_IP}
export CODETRANS_FRONTEND_SERVICE_IP=${HOST_IP}
### The port of the frontend service
export CODETRANS_FRONTEND_SERVICE_PORT=18155
export CODETRANS_FRONTEND_SERVICE_PORT=5173
### Name of GenAI service for route requests to application
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
@@ -45,10 +45,10 @@ export CODETRANS_BACKEND_SERVICE_NAME=codetrans
export CODETRANS_BACKEND_SERVICE_IP=${HOST_IP}
### The port of the backend service
export CODETRANS_BACKEND_SERVICE_PORT=18154
export CODETRANS_BACKEND_SERVICE_PORT=7777
### The port of the Nginx reverse proxy for application
export CODETRANS_NGINX_PORT=18153
export CODETRANS_NGINX_PORT=8088
### Endpoint of the backend service
export CODETRANS_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"

View File

@@ -8,14 +8,14 @@
# which can be used to connect to the server from the Internet. It must be specified in the EXTERNAL_HOST_IP variable.
# If the server is used only on the internal network or has a direct external address,
# specify it in HOST_IP and in EXTERNAL_HOST_IP.
export HOST_IP=''
export EXTERNAL_HOST_IP=''
export HOST_IP=${ip_address}
export EXTERNAL_HOST_IP=${ip_address}
### Model ID
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
### The port of the TGI service. On this port, the TGI service will accept connections
export CODETRANS_VLLM_SERVICE_PORT=18156
export CODETRANS_VLLM_SERVICE_PORT=8008
### The endpoint of the TGI service to which requests to this service will be sent (formed from previously set variables)
export CODETRANS_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_VLLM_SERVICE_PORT}"
@@ -24,7 +24,7 @@ export CODETRANS_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_VLLM_SERVICE_PORT}"
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
### The port of the LLM service. On this port, the LLM service will accept connections
export CODETRANS_LLM_SERVICE_PORT=18157
export CODETRANS_LLM_SERVICE_PORT=9000
### The IP address or domain name of the server for CodeTrans MegaService
export CODETRANS_MEGA_SERVICE_HOST_IP=${HOST_IP}
@@ -36,7 +36,7 @@ export CODETRANS_LLM_SERVICE_HOST_IP=${HOST_IP}
export CODETRANS_FRONTEND_SERVICE_IP=${HOST_IP}
### The port of the frontend service
export CODETRANS_FRONTEND_SERVICE_PORT=18155
export CODETRANS_FRONTEND_SERVICE_PORT=5173
### Name of GenAI service for route requests to application
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
@@ -45,10 +45,10 @@ export CODETRANS_BACKEND_SERVICE_NAME=codetrans
export CODETRANS_BACKEND_SERVICE_IP=${HOST_IP}
### The port of the backend service
export CODETRANS_BACKEND_SERVICE_PORT=18154
export CODETRANS_BACKEND_SERVICE_PORT=7777
### The port of the Nginx reverse proxy for application
export CODETRANS_NGINX_PORT=18153
export CODETRANS_NGINX_PORT=8088
### Endpoint of the backend service
export CODETRANS_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"

View File

@@ -5,6 +5,8 @@ services:
codetrans:
build:
args:
IMAGE_REPO: ${REGISTRY:-opea}
BASE_TAG: ${TAG:-latest}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
@@ -45,4 +47,5 @@ services:
build:
context: GenAIComps
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
extends: codetrans
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}

45
CodeTrans/tests/README.md Normal file
View File

@@ -0,0 +1,45 @@
# CodeTrans E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Xeon with TGI:
```bash
bash test_compose_tgi_on_xeon.sh
```
On Intel Xeon with vLLM:
```bash
bash test_compose_on_xeon.sh
```
On Intel Gaudi with TGI:
```bash
bash test_compose_tgi_on_gaudi.sh
```
On Intel Gaudi with vLLM:
```bash
bash test_compose_on_gaudi.sh
```
On AMD ROCm with TGI:
```bash
bash test_compose_on_rocm.sh
```
On AMD ROCm with vLLM:
```bash
bash test_compose_vllm_on_rocm.sh
```

View File

@@ -17,22 +17,17 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen vllm-gaudi nginx"
@@ -42,25 +37,12 @@ function build_docker_images() {
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
cd $WORKPATH/docker_compose
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=codetrans
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=7777
export NGINX_PORT=80
export host_ip=${ip_address}
source set_env.sh
cd intel/hpu/gaudi
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -173,17 +155,35 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
validate_megaservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -18,19 +18,13 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen nginx"
@@ -42,21 +36,7 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
export CODETRANS_TGI_SERVICE_PORT=8008
export CODETRANS_LLM_SERVICE_PORT=9000
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export CODETRANS_TGI_LLM_ENDPOINT="http://${ip_address}:${CODETRANS_TGI_SERVICE_PORT}"
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export CODETRANS_MEGA_SERVICE_HOST_IP=${ip_address}
export CODETRANS_LLM_SERVICE_HOST_IP=${ip_address}
export CODETRANS_FRONTEND_SERVICE_IP=${ip_address}
export CODETRANS_FRONTEND_SERVICE_PORT=5173
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
export CODETRANS_BACKEND_SERVICE_IP=${ip_address}
export CODETRANS_BACKEND_SERVICE_PORT=7777
export CODETRANS_NGINX_PORT=8088
export CODETRANS_BACKEND_SERVICE_URL="http://${ip_address}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"
export HOST_IP=${ip_address}
source set_env.sh
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -175,17 +155,35 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
validate_megaservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -17,21 +17,16 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
cd ../
@@ -44,21 +39,13 @@ function build_docker_images() {
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
cd $WORKPATH/docker_compose
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=codetrans
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=7777
export NGINX_PORT=80
export host_ip=${ip_address}
source set_env.sh
cd intel/cpu/xeon/
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -171,17 +158,35 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
validate_megaservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -17,19 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen nginx"
@@ -40,21 +34,13 @@ function build_docker_images() {
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
cd $WORKPATH/docker_compose
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=codetrans
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=7777
export NGINX_PORT=80
export host_ip=${ip_address}
source set_env.sh
cd intel/hpu/gaudi/
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -175,17 +161,35 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
validate_megaservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -17,19 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen nginx"
@@ -40,21 +34,13 @@ function build_docker_images() {
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
cd $WORKPATH/docker_compose
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=codetrans
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=7777
export NGINX_PORT=80
export host_ip=${ip_address}
source set_env.sh
cd intel/cpu/xeon/
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -175,17 +161,35 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
validate_megaservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -17,19 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
echo "GenAIComps test commit is $(git rev-parse HEAD)"
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="vllm-rocm llm-textgen codetrans codetrans-ui nginx"
@@ -40,22 +34,7 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
export HOST_IP=${ip_address}
export CODETRANS_VLLM_SERVICE_PORT=8008
export CODETRANS_LLM_SERVICE_PORT=9000
export CODETRANS_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
export CODETRANS_LLM_ENDPOINT="http://${ip_address}:${CODETRANS_VLLM_SERVICE_PORT}"
export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export CODETRANS_MEGA_SERVICE_HOST_IP=${ip_address}
export CODETRANS_LLM_SERVICE_HOST_IP=${ip_address}
export CODETRANS_FRONTEND_SERVICE_IP=${ip_address}
export CODETRANS_FRONTEND_SERVICE_PORT=5173
export CODETRANS_BACKEND_SERVICE_NAME=codetrans
export CODETRANS_BACKEND_SERVICE_IP=${ip_address}
export CODETRANS_BACKEND_SERVICE_PORT=7777
export CODETRANS_NGINX_PORT=8088
export CODETRANS_BACKEND_SERVICE_URL="http://${ip_address}:${CODETRANS_BACKEND_SERVICE_PORT}/v1/codetrans"
export HOST_IP=${ip_address}
source set_env_vllm.sh
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -175,17 +154,35 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservices"
validate_microservices
validate_megaservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_megaservice"
validate_megaservice
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -3,8 +3,13 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
export host_ip=""
export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
pushd "../../" > /dev/null
ls -l
source .set_env.sh
popd > /dev/null
export host_ip=${ip_address}
export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export DBQNA_TGI_SERVICE_PORT=8008
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
@@ -14,3 +19,4 @@ export POSTGRES_PASSWORD="testpwd"
export POSTGRES_DB="chinook"
export DBQNA_TEXT_TO_SQL_PORT=9090
export DBQNA_UI_PORT=5174
export build_texttosql_url="${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1"

19
DBQnA/docker_compose/intel/cpu/xeon/set_env.sh Normal file → Executable file
View File

@@ -2,26 +2,19 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
#export host_ip=$(hostname -I | awk '{print $1}')
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
fi
if [ -z "${host_ip}" ]; then
echo "Error: host_ip is not set. Please set host_ip first."
fi
export host_ip=${ip_address}
export no_proxy=$no_proxy,$host_ip,dbqna-xeon-react-ui-server,text2sql-service,tgi-service,postgres-container
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export TGI_PORT=8008
export TGI_LLM_ENDPOINT="http://${host_ip}:${TGI_PORT}"
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=testpwd
export POSTGRES_DB=chinook
export TGI_PORT=8008
export TEXT2SQL_PORT=9090
"set_env.sh" 27L, 974B
export TGI_LLM_ENDPOINT="http://${host_ip}:${TGI_PORT}"
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null
export TGI_PORT=8008
export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}"
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"

View File

@@ -7,6 +7,8 @@ services:
context: GenAIComps
dockerfile: comps/text2sql/src/Dockerfile
args:
IMAGE_REPO: ${REGISTRY:-opea}
BASE_TAG: ${TAG:-latest}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
@@ -16,8 +18,6 @@ services:
context: ../ui
dockerfile: ./docker/Dockerfile.react
args:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
texttosql_url: ${build_texttosql_url}
extends: text2sql
image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}

21
DBQnA/tests/README.md Normal file
View File

@@ -0,0 +1,21 @@
# DBQnA E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Xeon with TGI:
```bash
bash test_compose_on_xeon.sh
```
On AMD ROCm with TGI:
```bash
bash test_compose_on_rocm.sh
```

View File

@@ -4,24 +4,20 @@
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
tgi_port=8008
tgi_volume=$WORKPATH/data
export host_ip=${ip_address}
export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export DBQNA_TGI_SERVICE_PORT=8008
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export MODEL_ID=${DBQNA_LLM_MODEL_ID}
export POSTGRES_USER="postgres"
export POSTGRES_PASSWORD="testpwd"
export POSTGRES_DB="chinook"
export DBQNA_TEXT_TO_SQL_PORT=9090
export DBQNA_UI_PORT=5174
export build_texttosql_url="${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1"
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env.sh
export MODEL_CACHE=${model_cache:-"/var/lib/GenAI/data"}
function build_docker_images() {
@@ -36,7 +32,7 @@ function build_docker_images() {
docker images && sleep 1s
}
function start_service() {
function start_services() {
cd "$WORKPATH"/docker_compose/amd/gpu/rocm
# Start Docker Containers
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
@@ -57,7 +53,8 @@ function validate_microservice() {
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
-H 'Content-Type: application/json')
if [[ $result == *"output"* ]]; then
if echo "$result" | jq -e '.result.output' > /dev/null 2>&1; then
# if [[ $result == *"output"* ]]; then
echo $result
echo "Result correct."
else
@@ -105,16 +102,31 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
build_docker_images
start_service
sleep 10s
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservice"
validate_microservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -15,7 +15,6 @@ export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
tgi_port=8008
function build_docker_images() {
cd $WORKPATH/docker_image_build
@@ -28,16 +27,9 @@ function build_docker_images() {
docker images && sleep 1s
}
function start_service() {
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
export model="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_MODEL_ID=${model}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=testpwd
export POSTGRES_DB=chinook
export TEXT2SQL_PORT=9090
export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_port}"
source ./set_env.sh
# Start Docker Containers
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
@@ -60,7 +52,8 @@ function validate_microservice() {
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
-H 'Content-Type: application/json')
if [[ $result == *"output"* ]]; then
if echo "$result" | jq -e '.result.output' > /dev/null 2>&1; then
# if [[ $result == *"output"* ]]; then
echo $result
echo "Result correct."
else
@@ -108,16 +101,31 @@ function stop_docker() {
function main() {
echo "::group::stop_docker"
stop_docker
echo "::endgroup::"
build_docker_images
start_service
echo "::group::build_docker_images"
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
echo "::endgroup::"
echo "::group::start_services"
start_services
echo "::endgroup::"
echo "::group::validate_microservice"
validate_microservice
validate_frontend
echo "::endgroup::"
echo "::group::validate_frontend"
validate_frontend
echo "::endgroup::"
echo "::group::stop_docker"
stop_docker
echo y | docker system prune
echo "::endgroup::"
docker system prune -f
}

View File

@@ -5,3 +5,25 @@
pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
ip_address=$(hostname -I | awk '{print $1}')
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export host_ip=${ip_address}
export RERANK_TYPE="tei"
export LOGFLAG=true
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export MILVUS_HOST=${ip_address}
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"

View File

@@ -5,3 +5,26 @@
pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
ip_address=$(hostname -I | awk '{print $1}')
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export host_ip=${ip_address}
export RERANK_TYPE="tei"
export LOGFLAG=true
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export MILVUS_HOST=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"

View File

@@ -0,0 +1,33 @@
# DocIndexRetriever E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Xeon with TGI:
```bash
bash test_compose_on_xeon.sh
```
On Intel Gaudi with TGI:
```bash
bash test_compose_on_gaudi.sh
```
On AMD ROCm with TGI:
```bash
bash test_compose_on_rocm.sh
```
On AMD ROCm with vLLM:
```bash
bash test_compose_vllm_on_rocm.sh
```

View File

@@ -36,22 +36,7 @@ function build_docker_images() {
function start_services() {
echo "Starting Docker Services...."
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export MILVUS_HOST=${ip_address}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export host_ip=${ip_address}
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
export RERANK_TYPE="tei"
export LOGFLAG=true
source ./set_env.sh
# Start Docker Containers
docker compose -f compose_milvus.yaml up -d

View File

@@ -35,22 +35,7 @@ function build_docker_images() {
function start_services() {
echo "Starting Docker Services...."
cd $WORKPATH/docker_compose/intel/cpu/xeon
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export MILVUS_HOST=${ip_address}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export host_ip=${ip_address}
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
export RERANK_TYPE="tei"
export LOGFLAG=true
source ./set_env.sh
# Start Docker Containers
docker compose -f compose_milvus.yaml up -d

View File

@@ -34,20 +34,7 @@ function build_docker_images() {
function start_services() {
echo "Starting Docker Services...."
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export host_ip=${ip_address}
export RERANK_TYPE="tei"
export LOGFLAG=true
source ./set_env.sh
# Start Docker Containers
docker compose up -d

View File

@@ -34,22 +34,7 @@ function build_docker_images() {
function start_services() {
echo "Starting Docker Services...."
cd $WORKPATH/docker_compose/intel/cpu/xeon
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export host_ip=${ip_address}
export RERANK_TYPE="tei"
export LOGFLAG=true
source ./set_env.sh
# Start Docker Containers
docker compose up -d

View File

@@ -239,13 +239,16 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-F "language=en" \
```
Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
### Query with audio and video
> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
> Audio and video can be passed as base64 strings or uploaded by providing a local file path.
Audio:
```bash
# Send base64 string
curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-H "Content-Type: application/json" \
-d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -257,11 +260,21 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-F "max_tokens=32" \
-F "language=en" \
-F "stream=True"
# Upload file
curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-H "Content-Type: multipart/form-data" \
-F "type=audio" \
-F "messages=" \
-F "files=@/path to your file (.mp3, .wav)" \
-F "max_tokens=32" \
-F "language=en"
```
Video:
```bash
# Send base64 string
curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-H "Content-Type: application/json" \
-d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -273,6 +286,15 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-F "max_tokens=32" \
-F "language=en" \
-F "stream=True"
# Upload file
curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
-H "Content-Type: multipart/form-data" \
-F "type=video" \
-F "messages=" \
-F "files=@/path to your file (.mp4)" \
-F "max_tokens=32" \
-F "language=en"
```
### Query with long context

View File

@@ -3,7 +3,7 @@
# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0
export HOST_IP=''
export HOST_IP=${ip_address}
export DOCSUM_MAX_INPUT_TOKENS="2048"
export DOCSUM_MAX_TOTAL_TOKENS="4096"
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"

View File

@@ -3,7 +3,7 @@
# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0
export HOST_IP=''
export HOST_IP=${ip_address}
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export DOCSUM_MAX_INPUT_TOKENS=2048
export DOCSUM_MAX_TOTAL_TOKENS=4096

View File

@@ -21,35 +21,29 @@ This section describes how to quickly deploy and test the DocSum service manuall
6. [Test the Pipeline](#test-the-pipeline)
7. [Cleanup the Deployment](#cleanup-the-deployment)
### Access the Code
### Access the Code and Set Up Environment
Clone the GenAIExample repository and access the ChatQnA Intel Xeon platform Docker Compose files and supporting scripts:
```
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/DocSum/docker_compose/intel/cpu/xeon/
cd GenAIExamples/DocSum/docker_compose
source set_env.sh
cd intel/cpu/xeon/
```
Checkout a released version, such as v1.2:
NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`.
```
git checkout v1.2
Checkout a released version, such as v1.3:
```bash
git checkout v1.3
```
### Generate a HuggingFace Access Token
Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
### Configure the Deployment Environment
To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory:
```
source ./set_env.sh
```
The _set_env.sh_ script will prompt for required and optional environment variables used to configure the DocSum services. If a value is not entered, the script will use a default value for the same. It will also generate a _.env_ file defining the desired configuration. Consult the section on [DocSum Service configuration](#docsum-service-configuration) for information on how service specific configuration parameters affect deployments.
### Deploy the Services Using Docker Compose
To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute:
@@ -78,13 +72,13 @@ Please refer to the table below to build different microservices from source:
After running docker compose, check if all the containers launched via docker compose have started:
```
```bash
docker ps -a
```
For the default deployment, the following 5 containers should have started:
```
```bash
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
748f577b3c78 opea/whisper:latest "python whisper_s…" 5 minutes ago Up About a minute 0.0.0.0:7066->7066/tcp, :::7066->7066/tcp docsum-xeon-whisper-server
4eq8b7034fd9 opea/docsum-gradio-ui:latest "docker-entrypoint.s…" 5 minutes ago Up About a minute 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp docsum-xeon-ui-server
@@ -109,7 +103,7 @@ curl -X POST http://${host_ip}:8888/v1/docsum \
To stop the containers associated with the deployment, execute the following command:
```
```bash
docker compose -f compose.yaml down
```
@@ -156,16 +150,19 @@ curl http://${host_ip}:8888/v1/docsum \
-F "messages=" \
-F "files=@/path to your file (.txt, .docx, .pdf)" \
-F "max_tokens=32" \
-F "language=en" \
-F "language=en"
```
Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
### Query with audio and video
> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
> Audio and video can be passed as base64 strings or uploaded by providing a local file path.
Audio:
```bash
# Send base64 string
curl -X POST http://${host_ip}:8888/v1/docsum \
-H "Content-Type: application/json" \
-d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -177,11 +174,21 @@ curl http://${host_ip}:8888/v1/docsum \
-F "max_tokens=32" \
-F "language=en" \
-F "stream=True"
# Upload file
curl http://${host_ip}:8888/v1/docsum \
-H "Content-Type: multipart/form-data" \
-F "type=audio" \
-F "messages=" \
-F "files=@/path to your file (.mp3, .wav)" \
-F "max_tokens=32" \
-F "language=en"
```
Video:
```bash
# Send base64 string
curl -X POST http://${host_ip}:8888/v1/docsum \
-H "Content-Type: application/json" \
-d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -193,6 +200,15 @@ curl http://${host_ip}:8888/v1/docsum \
-F "max_tokens=32" \
-F "language=en" \
-F "stream=True"
# Upload file
curl http://${host_ip}:8888/v1/docsum \
-H "Content-Type: multipart/form-data" \
-F "type=video" \
-F "messages=" \
-F "files=@/path to your file (.mp4)" \
-F "max_tokens=32" \
-F "language=en"
```
### Query with long context

View File

@@ -23,35 +23,29 @@ This section describes how to quickly deploy and test the DocSum service manuall
6. [Test the Pipeline](#test-the-pipeline)
7. [Cleanup the Deployment](#cleanup-the-deployment)
### Access the Code
### Access the Code and Set Up Environment
Clone the GenAIExample repository and access the ChatQnA Intel® Gaudi® platform Docker Compose files and supporting scripts:
Clone the GenAIExample repository and access the DocSum Intel® Gaudi® platform Docker Compose files and supporting scripts:
```
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/DocSum/docker_compose/intel/hpu/gaudi/
cd GenAIExamples/DocSum/docker_compose
source set_env.sh
cd intel/hpu/gaudi/
```
Checkout a released version, such as v1.2:
NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`.
```
git checkout v1.2
Checkout a released version, such as v1.3:
```bash
git checkout v1.3
```
### Generate a HuggingFace Access Token
Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
### Configure the Deployment Environment
To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory:
```
source ./set_env.sh
```
The _set_env.sh_ script will prompt for required and optional environment variables used to configure the DocSum services. If a value is not entered, the script will use a default value for the same. It will also generate a _.env_ file defining the desired configuration. Consult the section on [DocSum Service configuration](#docsum-service-configuration) for information on how service specific configuration parameters affect deployments.
### Deploy the Services Using Docker Compose
To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute:
@@ -80,13 +74,13 @@ Please refer to the table below to build different microservices from source:
After running docker compose, check if all the containers launched via docker compose have started:
```
```bash
docker ps -a
```
For the default deployment, the following 5 containers should have started:
```
```bash
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
748f577b3c78 opea/whisper:latest "python whisper_s…" 5 minutes ago Up About a minute 0.0.0.0:7066->7066/tcp, :::7066->7066/tcp docsum-gaudi-whisper-server
4eq8b7034fd9 opea/docsum-gradio-ui:latest "docker-entrypoint.s…" 5 minutes ago Up About a minute 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp docsum-gaudi-ui-server
@@ -111,7 +105,7 @@ curl -X POST http://${host_ip}:8888/v1/docsum \
To stop the containers associated with the deployment, execute the following command:
```
```bash
docker compose -f compose.yaml down
```
@@ -161,13 +155,16 @@ curl http://${host_ip}:8888/v1/docsum \
-F "language=en" \
```
Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
### Query with audio and video
> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
> Audio and video can be passed as base64 strings or uploaded by providing a local file path.
Audio:
```bash
# Send base64 string
curl -X POST http://${host_ip}:8888/v1/docsum \
-H "Content-Type: application/json" \
-d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -179,11 +176,21 @@ curl http://${host_ip}:8888/v1/docsum \
-F "max_tokens=32" \
-F "language=en" \
-F "stream=True"
# Upload file
curl http://${host_ip}:8888/v1/docsum \
-H "Content-Type: multipart/form-data" \
-F "type=audio" \
-F "messages=" \
-F "files=@/path to your file (.mp3, .wav)" \
-F "max_tokens=32" \
-F "language=en"
```
Video:
```bash
# Send base64 string
curl -X POST http://${host_ip}:8888/v1/docsum \
-H "Content-Type: application/json" \
-d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -195,6 +202,15 @@ curl http://${host_ip}:8888/v1/docsum \
-F "max_tokens=32" \
-F "language=en" \
-F "stream=True"
# Upload file
curl http://${host_ip}:8888/v1/docsum \
-H "Content-Type: multipart/form-data" \
-F "type=video" \
-F "messages=" \
-F "files=@/path to your file (.mp4)" \
-F "max_tokens=32" \
-F "language=en"
```
### Query with long context

View File

@@ -18,6 +18,7 @@ services:
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
NUM_CARDS: ${NUM_CARDS}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]

View File

@@ -6,11 +6,11 @@ pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null
export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
@@ -20,10 +20,17 @@ export MAX_TOTAL_TOKENS=2048
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM" # OpeaDocSumTgi
export FRONTEND_SERVICE_PORT=5173
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048

View File

@@ -63,6 +63,20 @@ def read_pdf(file):
return docs
def encode_file_to_base64(file_path):
"""Encode the content of a file to a base64 string.
Args:
file_path (str): The path to the file to be encoded.
Returns:
str: The base64 encoded string of the file content.
"""
with open(file_path, "rb") as f:
base64_str = base64.b64encode(f.read()).decode("utf-8")
return base64_str
def video2audio(
video_base64: str,
) -> str:
@@ -163,7 +177,6 @@ class DocSumService:
async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
"""Accept pure text, or files .txt/.pdf.docx, audio/video base64 string."""
if "application/json" in request.headers.get("content-type"):
data = await request.json()
stream_opt = data.get("stream", True)
@@ -193,25 +206,24 @@ class DocSumService:
uid = str(uuid.uuid4())
file_path = f"/tmp/{uid}"
if data_type is not None and data_type in ["audio", "video"]:
raise ValueError(
"Audio and Video file uploads are not supported in docsum with curl request, \
please use the UI or pass base64 string of the content directly."
)
import aiofiles
else:
import aiofiles
async with aiofiles.open(file_path, "wb") as f:
await f.write(await file.read())
async with aiofiles.open(file_path, "wb") as f:
await f.write(await file.read())
if data_type == "text":
docs = read_text_from_file(file, file_path)
os.remove(file_path)
elif data_type in ["audio", "video"]:
docs = encode_file_to_base64(file_path)
else:
raise ValueError(f"Data type not recognized: {data_type}")
if isinstance(docs, list):
file_summaries.extend(docs)
else:
file_summaries.append(docs)
os.remove(file_path)
if isinstance(docs, list):
file_summaries.extend(docs)
else:
file_summaries.append(docs)
if file_summaries:
prompt = handle_message(chat_request.messages) + "\n".join(file_summaries)

View File

@@ -16,3 +16,150 @@ helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGI
export HFTOKEN="insert-your-huggingface-token-here"
helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
```
## Deploy on AMD ROCm using Helm charts from the binary Helm repository
```bash
mkdir ~/docsum-k8s-install && cd ~/docsum-k8s-install
```
### Cloning repos
```bash
git clone git clone https://github.com/opea-project/GenAIExamples.git
```
### Go to the installation directory
```bash
cd GenAIExamples/DocSum/kubernetes/helm
```
### Settings system variables
```bash
export HFTOKEN="your_huggingface_token"
export MODELDIR="/mnt/opea-models"
export MODELNAME="Intel/neural-chat-7b-v3-3"
```
### Setting variables in Values files
#### If ROCm vLLM used
```bash
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
```
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
- TENSOR_PARALLEL_SIZE - must match the number of GPUs used
- resources:
limits:
amd.com/gpu: "1" - replace "1" with the number of GPUs used
#### If ROCm TGI used
```bash
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
```
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
- extraCmdArgs: [ "--num-shard","1" ] - replace "1" with the number of GPUs used
- resources:
limits:
amd.com/gpu: "1" - replace "1" with the number of GPUs used
### Installing the Helm Chart
#### If ROCm vLLM used
```bash
helm upgrade --install docsum oci://ghcr.io/opea-project/charts/docsum \
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
--values rocm-values.yaml
```
#### If ROCm TGI used
```bash
helm upgrade --install docsum oci://ghcr.io/opea-project/charts/docsum \
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
--values rocm-tgi-values.yaml
```
## Deploy on AMD ROCm using Helm charts from Git repositories
### Creating working dirs
```bash
mkdir ~/docsum-k8s-install && cd ~/docsum-k8s-install
```
### Cloning repos
```bash
git clone git clone https://github.com/opea-project/GenAIExamples.git
git clone git clone https://github.com/opea-project/GenAIInfra.git
```
### Go to the installation directory
```bash
cd GenAIExamples/DocSum/kubernetes/helm
```
### Settings system variables
```bash
export HFTOKEN="your_huggingface_token"
export MODELDIR="/mnt/opea-models"
export MODELNAME="Intel/neural-chat-7b-v3-3"
```
### Setting variables in Values files
#### If ROCm vLLM used
```bash
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
```
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
- TENSOR_PARALLEL_SIZE - must match the number of GPUs used
- resources:
limits:
amd.com/gpu: "1" - replace "1" with the number of GPUs used
#### If ROCm TGI used
```bash
nano ~/docsum-k8s-install/GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
```
- HIP_VISIBLE_DEVICES - this variable specifies the ID of the GPU that you want to use.
You can specify either one or several comma-separated ones - "0" or "0,1,2,3"
- extraCmdArgs: [ "--num-shard","1" ] - replace "1" with the number of GPUs used
- resources:
limits:
amd.com/gpu: "1" - replace "1" with the number of GPUs used
### Installing the Helm Chart
#### If ROCm vLLM used
```bash
cd ~/docsum-k8s-install/GenAIInfra/helm-charts
scripts/update_dependency.sh
helm dependency update docsum
helm upgrade --install docsum docsum \
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
--values ../../GenAIExamples/DocSum/kubernetes/helm/rocm-values.yaml
```
#### If ROCm TGI used
```bash
cd ~/docsum-k8s-install/GenAIInfra/helm-charts
scripts/update_dependency.sh
helm dependency update docsum
helm upgrade --install docsum docsum \
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
--values ../../GenAIExamples/DocSum/kubernetes/helm/rocm-tgi-values.yaml
```

View File

@@ -0,0 +1,45 @@
# Copyright (C) 2025 Advanced Micro Devices, Inc.
tgi:
enabled: true
accelDevice: "rocm"
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: "2.4.1-rocm"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
USE_FLASH_ATTENTION: "false"
FLASH_ATTENTION_RECOMPUTE: "false"
HIP_VISIBLE_DEVICES: "0"
MAX_BATCH_SIZE: "4"
extraCmdArgs: [ "--num-shard","1" ]
resources:
limits:
amd.com/gpu: "1"
requests:
cpu: 1
memory: 16Gi
securityContext:
readOnlyRootFilesystem: false
runAsNonRoot: false
runAsUser: 0
capabilities:
add:
- SYS_PTRACE
readinessProbe:
initialDelaySeconds: 60
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
startupProbe:
initialDelaySeconds: 60
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
llm-uservice:
DOCSUM_BACKEND: "TGI"
retryTimeoutSeconds: 720
vllm:
enabled: false

View File

@@ -0,0 +1,40 @@
# Copyright (C) 2025 Advanced Micro Devices, Inc.
tgi:
enabled: false
llm-uservice:
DOCSUM_BACKEND: "vLLM"
retryTimeoutSeconds: 720
vllm:
enabled: true
accelDevice: "rocm"
image:
repository: opea/vllm-rocm
tag: latest
env:
HIP_VISIBLE_DEVICES: "0"
TENSOR_PARALLEL_SIZE: "1"
HF_HUB_DISABLE_PROGRESS_BARS: "1"
HF_HUB_ENABLE_HF_TRANSFER: "0"
VLLM_USE_TRITON_FLASH_ATTN: "0"
VLLM_WORKER_MULTIPROC_METHOD: "spawn"
PYTORCH_JIT: "0"
HF_HOME: "/data"
extraCmd:
command: [ "python3", "/workspace/api_server.py" ]
extraCmdArgs: [ "--swap-space", "16",
"--disable-log-requests",
"--dtype", "float16",
"--num-scheduler-steps", "1",
"--distributed-executor-backend", "mp" ]
resources:
limits:
amd.com/gpu: "1"
startupProbe:
failureThreshold: 180
securityContext:
readOnlyRootFilesystem: false
runAsNonRoot: false
runAsUser: 0

45
DocSum/tests/README.md Normal file
View File

@@ -0,0 +1,45 @@
# DocSum E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Xeon with vLLM:
```bash
bash test_compose_on_xeon.sh
```
On Intel Xeon with TGI:
```bash
bash test_compose_tgi_on_xeon.sh
```
On Intel Gaudi with vLLM:
```bash
bash test_compose_on_gaudi.sh
```
On Intel Gaudi with TGI:
```bash
bash test_compose_tgi_on_gaudi.sh
```
On AMD ROCm with TGI:
```bash
bash test_compose_on_rocm.sh
```
On AMD ROCm with vLLM:
```bash
bash test_compose_vllm_on_rocm.sh
```

View File

@@ -10,35 +10,22 @@ export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}')
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
source $WORKPATH/docker_compose/set_env.sh
export MODEL_CACHE=${model_cache:-"./data"}
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
# Get the root folder of the current script
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
@@ -63,8 +50,8 @@ function build_docker_images() {
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null && cd ../
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="docsum docsum-gradio-ui whisper llm-docsum vllm-gaudi"
@@ -250,6 +237,20 @@ function validate_megaservice_multimedia() {
"language=en" \
"stream=False"
echo ">>> Checking audio data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=audio" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.wav" \
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -271,6 +272,20 @@ function validate_megaservice_multimedia() {
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=video" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.mp4" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_long_text() {

View File

@@ -14,21 +14,8 @@ export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
export HOST_IP=${ip_address}
export host_ip=${ip_address}
export DOCSUM_MAX_INPUT_TOKENS="2048"
export DOCSUM_MAX_TOTAL_TOKENS="4096"
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export DOCSUM_TGI_SERVICE_PORT="8008"
export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=''
export DOCSUM_WHISPER_PORT="7066"
export ASR_SERVICE_HOST_IP="${HOST_IP}"
export DOCSUM_LLM_SERVER_PORT="9000"
export DOCSUM_BACKEND_SERVER_PORT="18072"
export DOCSUM_FRONTEND_PORT="18073"
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env.sh
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
@@ -129,7 +116,7 @@ function validate_microservices() {
# whisper microservice
ulimit -s 65536
validate_services \
"${host_ip}:${DOCSUM_WHISPER_PORT}/v1/asr" \
"${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \
'{"asr_result":"well"}' \
"whisper-service" \
"whisper-service" \
@@ -137,7 +124,7 @@ function validate_microservices() {
# tgi for llm service
validate_services \
"${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/generate" \
"${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}/generate" \
"generated_text" \
"docsum-tgi-service" \
"docsum-tgi-service" \
@@ -145,7 +132,7 @@ function validate_microservices() {
# llm microservice
validate_services \
"${host_ip}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
"text" \
"docsum-llm-server" \
"docsum-llm-server" \
@@ -158,7 +145,7 @@ function validate_megaservice() {
local DOCKER_NAME="docsum-backend-server"
local EXPECTED_RESULT="[DONE]"
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
local URL="${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
local URL="${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
local DATA_TYPE="type=text"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
@@ -188,7 +175,7 @@ function validate_megaservice_json() {
echo ""
echo ">>> Checking text data with Content-Type: application/json"
validate_services \
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"[DONE]" \
"docsum-backend-server" \
"docsum-backend-server" \
@@ -196,7 +183,7 @@ function validate_megaservice_json() {
echo ">>> Checking audio data"
validate_services \
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"[DONE]" \
"docsum-backend-server" \
"docsum-backend-server" \
@@ -204,7 +191,7 @@ function validate_megaservice_json() {
echo ">>> Checking video data"
validate_services \
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"[DONE]" \
"docsum-backend-server" \
"docsum-backend-server" \

View File

@@ -10,30 +10,18 @@ export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
source $WORKPATH/docker_compose/set_env.sh
export MODEL_CACHE=${model_cache:-"./data"}
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
# Get the root folder of the current script
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
@@ -58,7 +46,7 @@ function build_docker_images() {
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="v0.8.3"
VLLM_VER=v0.9.0
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
cd ../
@@ -249,6 +237,20 @@ function validate_megaservice_multimedia() {
"language=en" \
"stream=False"
echo ">>> Checking audio data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=audio" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.wav" \
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -270,6 +272,20 @@ function validate_megaservice_multimedia() {
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=video" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.mp4" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_long_text() {

View File

@@ -9,32 +9,20 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"}
export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
source $WORKPATH/docker_compose/set_env.sh
export MODEL_CACHE=${model_cache:-"./data"}
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
# Get the root folder of the current script
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
@@ -241,6 +229,20 @@ function validate_megaservice_multimedia() {
"language=en" \
"stream=False"
echo ">>> Checking audio data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=audio" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.wav" \
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -262,6 +264,20 @@ function validate_megaservice_multimedia() {
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=video" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.mp4" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_long_text() {

View File

@@ -9,31 +9,20 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"}
export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
source $WORKPATH/docker_compose/set_env.sh
export MODEL_CACHE=${model_cache:-"./data"}
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
# Get the root folder of the current script
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
@@ -240,6 +229,20 @@ function validate_megaservice_multimedia() {
"language=en" \
"stream=False"
echo ">>> Checking audio data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=audio" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.wav" \
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -261,6 +264,20 @@ function validate_megaservice_multimedia() {
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=video" \
"messages=" \
"files=@$ROOT_FOLDER/data/test.mp4" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_long_text() {

View File

@@ -16,21 +16,7 @@ WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
export host_ip=${ip_address}
export HOST_IP=${ip_address}
export EXTERNAL_HOST_IP=${ip_address}
export DOCSUM_HUGGINGFACEHUB_API_TOKEN="${HUGGINGFACEHUB_API_TOKEN}"
export DOCSUM_MAX_INPUT_TOKENS=2048
export DOCSUM_MAX_TOTAL_TOKENS=4096
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export DOCSUM_VLLM_SERVICE_PORT="8008"
export DOCSUM_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}"
export DOCSUM_WHISPER_PORT="7066"
export ASR_SERVICE_HOST_IP="${HOST_IP}"
export DOCSUM_LLM_SERVER_PORT="9000"
export DOCSUM_BACKEND_SERVER_PORT="18072"
export DOCSUM_FRONTEND_PORT="18073"
export BACKEND_SERVICE_ENDPOINT="http://${EXTERNAL_HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_vllm.sh
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
@@ -130,7 +116,7 @@ function validate_microservices() {
# whisper microservice
ulimit -s 65536
validate_services \
"${host_ip}:${DOCSUM_WHISPER_PORT}/v1/asr" \
"${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \
'{"asr_result":"well"}' \
"whisper-service" \
"whisper-service" \
@@ -138,7 +124,7 @@ function validate_microservices() {
# vLLM service
validate_services \
"${host_ip}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \
"${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \
"content" \
"docsum-vllm-service" \
"docsum-vllm-service" \
@@ -146,7 +132,7 @@ function validate_microservices() {
# llm microservice
validate_services \
"${host_ip}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \
"text" \
"docsum-llm-server" \
"docsum-llm-server" \
@@ -159,7 +145,7 @@ function validate_megaservice() {
local DOCKER_NAME="docsum-backend-server"
local EXPECTED_RESULT="[DONE]"
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
local URL="${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
local URL="${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
local DATA_TYPE="type=text"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
@@ -189,7 +175,7 @@ function validate_megaservice_json() {
echo ""
echo ">>> Checking text data with Content-Type: application/json"
validate_services \
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"[DONE]" \
"docsum-backend-server" \
"docsum-backend-server" \
@@ -197,7 +183,7 @@ function validate_megaservice_json() {
echo ">>> Checking audio data"
validate_services \
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"[DONE]" \
"docsum-backend-server" \
"docsum-backend-server" \
@@ -205,7 +191,7 @@ function validate_megaservice_json() {
echo ">>> Checking video data"
validate_services \
"${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \
"[DONE]" \
"docsum-backend-server" \
"docsum-backend-server" \

View File

@@ -22,76 +22,12 @@ logger = logging.getLogger(__name__)
class DocSumUI:
def __init__(self):
"""Initialize the DocSumUI class with accepted file types, headers, and backend service endpoint."""
self.ACCEPTED_FILE_TYPES = ["pdf", "doc", "docx"]
self.ACCEPTED_TEXT_FILE_TYPES = [".pdf", ".doc", ".docx"]
self.ACCEPTED_AUDIO_FILE_TYPES = [".mp3", ".wav"]
self.ACCEPTED_VIDEO_FILE_TYPES = [".mp4"]
self.HEADERS = {"Content-Type": "application/json"}
self.BACKEND_SERVICE_ENDPOINT = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/docsum")
def encode_file_to_base64(self, file_path):
"""Encode the content of a file to a base64 string.
Args:
file_path (str): The path to the file to be encoded.
Returns:
str: The base64 encoded string of the file content.
"""
logger.info(">>> Encoding file to base64: %s", file_path)
with open(file_path, "rb") as f:
base64_str = base64.b64encode(f.read()).decode("utf-8")
return base64_str
def read_file(self, file):
"""Read and process the content of a file.
Args:
file (file-like object): The file to be read.
Returns:
str: The content of the file or an error message if the file type is unsupported.
"""
self.page_content = ""
self.pages = []
if file.name.endswith(".pdf"):
loader = PyPDFLoader(file)
elif file.name.endswith((".doc", ".docx")):
loader = Docx2txtLoader(file)
else:
msg = f"Unsupported file type '{file.name}'. Choose from {self.ACCEPTED_FILE_TYPES}"
logger.error(msg)
return msg
for page in loader.lazy_load():
self.page_content += page.page_content
return self.page_content
def read_audio_file(self, file):
"""Read and process the content of an audio file.
Args:
file (file-like object): The audio file to be read.
Returns:
str: The base64 encoded content of the audio file.
"""
logger.info(">>> Reading audio file: %s", file.name)
base64_str = self.encode_file_to_base64(file)
return base64_str
def read_video_file(self, file):
"""Read and process the content of a video file.
Args:
file (file-like object): The video file to be read.
Returns:
str: The base64 encoded content of the video file.
"""
logger.info(">>> Reading video file: %s", file.name)
base64_str = self.encode_file_to_base64(file)
return base64_str
def is_valid_url(self, url):
try:
result = urlparse(url)
@@ -128,78 +64,107 @@ class DocSumUI:
return self.page_content
def generate_summary(self, doc_content, document_type="text"):
def process_response(self, response):
if response.status_code == 200:
try:
# Check if the specific log path is in the response text
if "/logs/LLMChain/final_output" in response.text:
# Extract the relevant part of the response
temp = ast.literal_eval(
[
i.split("data: ")[1]
for i in response.text.split("\n\n")
if "/logs/LLMChain/final_output" in i
][0]
)["ops"]
# Find the final output value
final_output = [i["value"] for i in temp if i["path"] == "/logs/LLMChain/final_output"][0]
return final_output["text"]
else:
# Perform string replacements to clean the response text
cleaned_text = response.text
replacements = [
("'\n\ndata: b'", ""),
("data: b' ", ""),
("</s>'\n\ndata: [DONE]\n\n", ""),
("\n\ndata: b", ""),
("'\n\n", ""),
("'\n", ""),
('''\'"''', ""),
]
for old, new in replacements:
cleaned_text = cleaned_text.replace(old, new)
return cleaned_text
except (IndexError, KeyError, ValueError) as e:
# Handle potential errors during parsing
logger.error("Error parsing response: %s", e)
return response.text
def generate_summary(self, document, document_type="text"):
"""Generate a summary for the given document content.
Args:
doc_content (str): The content of the document.
document (str): The content or path of the document.
document_type (str): The type of the document (default is "text").
Returns:
str: The generated summary or an error message.
"""
logger.info(">>> BACKEND_SERVICE_ENDPOINT - %s", self.BACKEND_SERVICE_ENDPOINT)
data = {"max_tokens": 256, "type": document_type, "messages": doc_content}
data = {"max_tokens": 256, "type": document_type, "messages": ""}
try:
response = requests.post(
url=self.BACKEND_SERVICE_ENDPOINT,
headers=self.HEADERS,
data=json.dumps(data),
proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]},
)
if os.path.exists(document):
file_header = "text/plain"
file_ext = os.path.splitext(document)[-1]
if file_ext == ".pdf":
file_header = "application/pdf"
elif file_ext in [".doc", ".docx"]:
file_header = "application/octet-stream"
elif file_ext in self.ACCEPTED_AUDIO_FILE_TYPES + self.ACCEPTED_VIDEO_FILE_TYPES:
file_header = f"{document_type}/{file_ext[-3:]}"
files = {"files": (os.path.basename(document), open(document, "rb"), file_header)}
try:
response = requests.post(
url=self.BACKEND_SERVICE_ENDPOINT,
headers={},
files=files,
data=data,
proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]},
)
if response.status_code == 200:
try:
# Check if the specific log path is in the response text
if "/logs/LLMChain/final_output" in response.text:
# Extract the relevant part of the response
temp = ast.literal_eval(
[
i.split("data: ")[1]
for i in response.text.split("\n\n")
if "/logs/LLMChain/final_output" in i
][0]
)["ops"]
return self.process_response(response)
# Find the final output value
final_output = [i["value"] for i in temp if i["path"] == "/logs/LLMChain/final_output"][0]
return final_output["text"]
else:
# Perform string replacements to clean the response text
cleaned_text = response.text
replacements = [
("'\n\ndata: b'", ""),
("data: b' ", ""),
("</s>'\n\ndata: [DONE]\n\n", ""),
("\n\ndata: b", ""),
("'\n\n", ""),
("'\n", ""),
('''\'"''', ""),
]
for old, new in replacements:
cleaned_text = cleaned_text.replace(old, new)
return cleaned_text
except (IndexError, KeyError, ValueError) as e:
# Handle potential errors during parsing
logger.error("Error parsing response: %s", e)
return response.text
except requests.exceptions.RequestException as e:
logger.error("Request exception: %s", e)
return str(e)
except requests.exceptions.RequestException as e:
logger.error("Request exception: %s", e)
return str(e)
else:
data["messages"] = document
try:
response = requests.post(
url=self.BACKEND_SERVICE_ENDPOINT,
headers=self.HEADERS,
data=json.dumps(data),
proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]},
)
return self.process_response(response)
except requests.exceptions.RequestException as e:
logger.error("Request exception: %s", e)
return str(e)
return str(response.status_code)
def create_upload_ui(self, label, file_types, process_function, document_type="text"):
def create_upload_ui(self, label, file_types, document_type="text"):
"""Create a Gradio UI for file uploads.
Args:
label (str): The label for the upload button.
file_types (list): The list of accepted file types.
process_function (function): The function to process the uploaded file.
document_type (str): The document type (text, audio, or video). Default is text.
Returns:
gr.Blocks: The Gradio Blocks object representing the upload UI.
@@ -214,7 +179,7 @@ class DocSumUI:
label="Text Summary", placeholder="Summarized text will be displayed here"
)
upload_btn.upload(
lambda file: self.generate_summary(process_function(file), document_type=document_type),
lambda file: self.generate_summary(file, document_type=document_type),
upload_btn,
generated_text,
)
@@ -263,24 +228,21 @@ class DocSumUI:
# File Upload UI
file_ui = self.create_upload_ui(
label="Please upload a document (.pdf, .doc, .docx)",
file_types=[".pdf", ".doc", ".docx"],
process_function=self.read_file,
label=f"Please upload a document ({', '.join(self.ACCEPTED_TEXT_FILE_TYPES)})",
file_types=self.ACCEPTED_TEXT_FILE_TYPES,
)
# Audio Upload UI
audio_ui = self.create_upload_ui(
label="Please upload audio file (.wav, .mp3)",
file_types=[".wav", ".mp3"],
process_function=self.read_audio_file,
label=f"Please upload audio file ({', '.join(self.ACCEPTED_AUDIO_FILE_TYPES)})",
file_types=self.ACCEPTED_AUDIO_FILE_TYPES,
document_type="audio",
)
# Video Upload UI
video_ui = self.create_upload_ui(
label="Please upload Video file (.mp4)",
file_types=[".mp4"],
process_function=self.read_video_file,
label=f"Please upload video file ({', '.join(self.ACCEPTED_VIDEO_FILE_TYPES)})",
file_types=self.ACCEPTED_VIDEO_FILE_TYPES,
document_type="video",
)

View File

@@ -14,16 +14,19 @@ services:
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
edgecraftrag-server:
build:
context: ../
dockerfile: ./Dockerfile.server
extends: edgecraftrag
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
edgecraftrag-ui:
build:
context: ../
dockerfile: ./ui/docker/Dockerfile.ui
extends: edgecraftrag
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
edgecraftrag-ui-gradio:
build:
context: ../
dockerfile: ./ui/docker/Dockerfile.gradio
extends: edgecraftrag
image: ${REGISTRY:-opea}/edgecraftrag-ui-gradio:${TAG:-latest}

View File

@@ -18,12 +18,10 @@ declare module 'vue' {
AConfigProvider: typeof import('ant-design-vue/es')['ConfigProvider']
ADescriptions: typeof import('ant-design-vue/es')['Descriptions']
ADescriptionsItem: typeof import('ant-design-vue/es')['DescriptionsItem']
ADivider: typeof import('ant-design-vue/es')['Divider']
ADrawer: typeof import('ant-design-vue/es')['Drawer']
AEmpty: typeof import('ant-design-vue/es')['Empty']
AForm: typeof import('ant-design-vue/es')['Form']
AFormItem: typeof import('ant-design-vue/es')['FormItem']
AImage: typeof import('ant-design-vue/es')['Image']
AInput: typeof import('ant-design-vue/es')['Input']
AInputNumber: typeof import('ant-design-vue/es')['InputNumber']
ALayout: typeof import('ant-design-vue/es')['Layout']
@@ -31,7 +29,6 @@ declare module 'vue' {
ALayoutHeader: typeof import('ant-design-vue/es')['LayoutHeader']
AModal: typeof import('ant-design-vue/es')['Modal']
APagination: typeof import('ant-design-vue/es')['Pagination']
APopover: typeof import('ant-design-vue/es')['Popover']
ARadio: typeof import('ant-design-vue/es')['Radio']
ARadioGroup: typeof import('ant-design-vue/es')['RadioGroup']
ARow: typeof import('ant-design-vue/es')['Row']

View File

@@ -12,13 +12,16 @@
"@vueuse/i18n": "^4.0.0-beta.12",
"ant-design-vue": "^4.0.0-rc.6",
"axios": "^1.7.9",
"clipboard": "^2.0.11",
"dayjs": "^1.11.13",
"echarts": "^5.5.1",
"event-source-polyfill": "^1.0.31",
"highlight.js": "^11.11.1",
"http": "^0.0.1-security",
"js-cookie": "^3.0.5",
"lodash": "^4.17.21",
"marked": "^15.0.6",
"pinia": "^2.3.0",
"pinia": "^3.0.2",
"pinia-plugin-persistedstate": "^4.2.0",
"qs": "^6.13.1",
"socket.io-client": "^4.8.1",

View File

@@ -59,9 +59,9 @@ function build_vllm_docker_image() {
git clone https://github.com/HabanaAI/vllm-fork.git
fi
cd ./vllm-fork
# VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_VER} &> /dev/null
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
git checkout ${VLLM_FORK_VER} &> /dev/null
docker build --no-cache -f Dockerfile.hpu -t $vllm_image --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
if [ $? -ne 0 ]; then
echo "$vllm_image failed"

View File

@@ -18,7 +18,7 @@ Quick Start Deployment Steps:
2. Run Docker Compose.
3. Consume the GraphRAG Service.
Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
Note: If you do not have Docker installed you can [install Docker](https://docs.docker.com/engine/install/) first
### Quick Start: 1.Setup Environment Variable

View File

@@ -1,35 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Update the package index
sudo apt-get -y update
# Install prerequisites
sudo apt-get -y install ca-certificates curl --no-install-recommends --fix-missing
# Create the directory for the Docker GPG key
sudo install -m 0755 -d /etc/apt/keyrings
# Add Docker's official GPG key
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
# Set permissions for the GPG key
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add Docker repository to the sources list
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# Update the package index with Docker packages
sudo apt-get -y update
# Install Docker packages
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin --no-install-recommends --fix-missing
# add existing user
sudo usermod -aG docker $USER
# Optional: Verify that Docker is installed correctly
sudo docker --version

View File

@@ -10,6 +10,9 @@ pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
host_ip=$(hostname -I | awk '{print $1}')
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export TEI_EMBEDDER_PORT=11633
export LLM_ENDPOINT_PORT=11634
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
@@ -17,7 +20,6 @@ export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export OPENAI_LLM_MODEL="gpt-4o"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export NEO4J_PORT1=11631
export NEO4J_PORT2=11632
@@ -32,3 +34,4 @@ export MAX_TOTAL_TOKENS=8192
export DATA_PATH="/mnt/nvme2n1/hf_cache"
export DATAPREP_PORT=11103
export RETRIEVER_PORT=11635
export MEGA_SERVICE_PORT=8888

15
GraphRAG/tests/README.md Normal file
View File

@@ -0,0 +1,15 @@
# GraphRAG E2E test scripts
## Set the required environment variable
```bash
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
## Run test
On Intel Gaudi:
```bash
bash test_compose_on_gaudi.sh
```

Some files were not shown because too many files have changed in this diff Show More