Enable vllm for DocSum (#1716)
Set vllm as default llm serving, and add related docker compose files, readmes, and test scripts. Fix issue #1436 Signed-off-by: letonghan <letong.han@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -6,18 +6,21 @@ pushd "../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export MAX_INPUT_TOKENS=1024
|
||||
export MAX_TOTAL_TOKENS=2048
|
||||
|
||||
export no_proxy="${no_proxy},${host_ip}"
|
||||
export LLM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
|
||||
export LLM_ENDPOINT_PORT=8008
|
||||
export DOCSUM_PORT=9000
|
||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
|
||||
|
||||
Reference in New Issue
Block a user