Files
Wang, Kai Lawrence 8fe19291c8 [AudioQnA] Enable vLLM and set it as default LLM serving (#1657)
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-03-14 09:56:33 +08:00

29 lines
765 B
Bash

#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# export host_ip=<your External Public IP>
export host_ip=$(hostname -I | awk '{print $1}')
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
# <token>
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
# set vLLM parameters
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export MEGA_SERVICE_HOST_IP=${host_ip}
export WHISPER_SERVER_HOST_IP=${host_ip}
export SPEECHT5_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_PORT=7055
export LLM_SERVER_PORT=3006
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna