Remote inference support for examples in Productivity suite (#1818)

Signed-off-by: Srinarayan Srikanthan <srinarayan.srikanthan@intel.com>
This commit is contained in:
sri-intel
2025-04-18 02:36:57 -04:00
committed by GitHub
parent c793dd0b51
commit c63e2cd067
4 changed files with 279 additions and 0 deletions

View File

@@ -58,6 +58,7 @@ RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
LLM_MODEL = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -239,6 +240,7 @@ class ChatQnAService:
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
api_key=OPENAI_API_KEY,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
@@ -272,6 +274,7 @@ class ChatQnAService:
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
api_key=OPENAI_API_KEY,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
@@ -317,6 +320,7 @@ class ChatQnAService:
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
api_key=OPENAI_API_KEY,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,