Update GraphRAG to be compatible with latest component changes (#1427)
- Updated ENV VARS to align with recent changes in neo4j dataprep and retriever. - upgraded tgi-gaudi image version Related to GenAIComps repo issue #1025 (opea-project/GenAIComps#1025) Original PR #1384 Original contributor is @rbrugaro Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: Liang Lv <liang1.lv@intel.com>
This commit is contained in:
@@ -60,6 +60,7 @@ services:
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN: false
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
@@ -93,6 +94,7 @@ services:
|
||||
OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
|
||||
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
restart: unless-stopped
|
||||
retriever-neo4j-llamaindex:
|
||||
@@ -122,6 +124,7 @@ services:
|
||||
OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
|
||||
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J"
|
||||
restart: unless-stopped
|
||||
@@ -144,6 +147,7 @@ services:
|
||||
- RETRIEVER_SERVICE_PORT=7000
|
||||
- LLM_SERVER_HOST_IP=tgi-gaudi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -12,7 +12,7 @@ popd > /dev/null
|
||||
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
|
||||
export OPENAI_LLM_MODEL="gpt-4o"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
|
||||
@@ -21,3 +21,5 @@ export NEO4J_USERNAME=neo4j
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
|
||||
export LOGFLAG=True
|
||||
export RETRIEVER_SERVICE_PORT=80
|
||||
export LLM_SERVER_PORT=80
|
||||
export MAX_OUTPUT_TOKENS=1024
|
||||
|
||||
@@ -52,6 +52,7 @@ RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
@@ -60,7 +61,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["model"] = LLM_MODEL_ID
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -x
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
@@ -51,6 +51,8 @@ function start_services() {
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:6005"
|
||||
export host_ip=${ip_address}
|
||||
export LOGFLAG=true
|
||||
export MAX_OUTPUT_TOKENS="1024"
|
||||
unset OPENAI_API_KEY
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
@@ -76,6 +78,7 @@ function validate_service() {
|
||||
if [[ $SERVICE_NAME == *"extract_graph_neo4j"* ]]; then
|
||||
cd $LOG_PATH
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
|
||||
echo $HTTP_RESPONSE
|
||||
elif [[ $SERVICE_NAME == *"neo4j-apoc"* ]]; then
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
|
||||
else
|
||||
@@ -211,7 +214,7 @@ function main() {
|
||||
echo "Mega service start duration is $duration s"
|
||||
|
||||
if [ "${mode}" == "perf" ]; then
|
||||
python3 $WORKPATH/tests/chatqna_benchmark.py
|
||||
echo "not implemented"
|
||||
elif [ "${mode}" == "" ]; then
|
||||
validate_microservices
|
||||
validate_megaservice
|
||||
|
||||
@@ -21,7 +21,7 @@ export default defineConfig({
|
||||
* Maximum time expect() should wait for the condition to be met.
|
||||
* For example in `await expect(locator).toHaveText();`
|
||||
*/
|
||||
timeout: 30000,
|
||||
timeout: 300000,
|
||||
},
|
||||
/* Run tests in files in parallel */
|
||||
fullyParallel: true,
|
||||
|
||||
Reference in New Issue
Block a user