[AudioQnA] Fix the LLM model field for inputs alignment (#1611)

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
2025-03-05 22:15:07 +08:00
parent 8a5ad1fc72
commit 2dfcfa0436
5 changed files with 7 additions and 2 deletions
--- a/AudioQnA/audioqna.py
+++ b/AudioQnA/audioqna.py
@@ -16,13 +16,14 @@ SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
 SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
 LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
 LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
+LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")


 def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
    if self.services[cur_node].service_type == ServiceType.LLM:
        # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
        next_inputs = {}
-        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["model"] = LLM_MODEL_ID
        next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
        next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
        next_inputs["top_p"] = llm_parameters_dict["top_p"]
--- a/AudioQnA/audioqna_multilang.py
+++ b/AudioQnA/audioqna_multilang.py
@@ -17,6 +17,7 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
 GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
 LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
 LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
+LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")


 def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -24,7 +25,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
    if self.services[cur_node].service_type == ServiceType.LLM:
        # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
        next_inputs = {}
-        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["model"] = LLM_MODEL_ID
        next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
        next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
        next_inputs["top_p"] = llm_parameters_dict["top_p"]
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -69,6 +69,7 @@ services:
      - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
      - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
      - LLM_SERVER_PORT=${LLM_SERVER_PORT}
+      - LLM_MODEL_ID=${LLM_MODEL_ID}
      - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
      - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
    ipc: host
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -61,6 +61,7 @@ services:
      - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
      - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
      - LLM_SERVER_PORT=${LLM_SERVER_PORT}
+      - LLM_MODEL_ID=${LLM_MODEL_ID}
      - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
      - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
    ipc: host
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -82,6 +82,7 @@ services:
      - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
      - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
      - LLM_SERVER_PORT=${LLM_SERVER_PORT}
+      - LLM_MODEL_ID=${LLM_MODEL_ID}
      - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
      - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
    ipc: host