[AudioQnA] Enable vLLM and set it as default LLM serving (#1657)

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Wang, Kai Lawrence
2025-03-14 09:56:33 +08:00
committed by GitHub
parent 35c5cf5de8
commit 8fe19291c8
16 changed files with 747 additions and 99 deletions

View File

@@ -17,7 +17,7 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):