Enable vllm for DocSum (#1716)

Set vllm as default llm serving, and add related docker compose files, readmes, and test scripts.

Fix issue #1436

Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Letong Han
2025-03-28 17:15:01 +08:00
committed by GitHub
parent 87baeb833d
commit d4dcbd18ef
12 changed files with 1403 additions and 317 deletions

View File

@@ -6,18 +6,21 @@ pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null
export no_proxy="${no_proxy},${host_ip}"
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=2048
export no_proxy="${no_proxy},${host_ip}"
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
export LLM_ENDPOINT_PORT=8008
export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"