Update GraphRAG to be compatible with latest component changes (#1427)

- Updated ENV VARS to align with recent changes in neo4j dataprep and retriever. - upgraded tgi-gaudi image version Related to GenAIComps repo issue #1025 (opea-project/GenAIComps#1025) Original PR #1384 Original contributor is @rbrugaro Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: Liang Lv <liang1.lv@intel.com>
2025-01-21 00:18:01 +08:00
parent 0ba3decb6b
commit d0cd0aaf53
5 changed files with 15 additions and 5 deletions
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -60,6 +60,7 @@ services:
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
      FLASH_ATTENTION_RECOMPUTE: true
+      TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN: false
    runtime: habana
    cap_add:
      - SYS_NICE
@@ -93,6 +94,7 @@ services:
      OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
  retriever-neo4j-llamaindex:
@@ -122,6 +124,7 @@ services:
      OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J"
    restart: unless-stopped
@@ -144,6 +147,7 @@ services:
      - RETRIEVER_SERVICE_PORT=7000
      - LLM_SERVER_HOST_IP=tgi-gaudi-service
      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL_ID=${LLM_MODEL_ID}
      - LOGFLAG=${LOGFLAG}
    ipc: host
    restart: always
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -12,7 +12,7 @@ popd > /dev/null

 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
 export OPENAI_LLM_MODEL="gpt-4o"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
@@ -21,3 +21,5 @@ export NEO4J_USERNAME=neo4j
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
 export LOGFLAG=True
 export RETRIEVER_SERVICE_PORT=80
+export LLM_SERVER_PORT=80
+export MAX_OUTPUT_TOKENS=1024
--- a/GraphRAG/graphrag.py
+++ b/GraphRAG/graphrag.py
@@ -52,6 +52,7 @@ RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
 RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
 LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
 LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
+LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")


 def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -60,7 +61,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
    elif self.services[cur_node].service_type == ServiceType.LLM:
        # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
        next_inputs = {}
-        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["model"] = LLM_MODEL_ID
        next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
        next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
        next_inputs["top_p"] = llm_parameters_dict["top_p"]
--- a/GraphRAG/tests/test_compose_on_gaudi.sh
+++ b/GraphRAG/tests/test_compose_on_gaudi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -x
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -51,6 +51,8 @@ function start_services() {
    export TGI_LLM_ENDPOINT="http://${ip_address}:6005"
    export host_ip=${ip_address}
    export LOGFLAG=true
+    export MAX_OUTPUT_TOKENS="1024"
+    unset OPENAI_API_KEY

    # Start Docker Containers
    docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
@@ -76,6 +78,7 @@ function validate_service() {
    if [[ $SERVICE_NAME == *"extract_graph_neo4j"* ]]; then
        cd $LOG_PATH
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
+        echo $HTTP_RESPONSE
    elif [[ $SERVICE_NAME == *"neo4j-apoc"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
    else
@@ -211,7 +214,7 @@ function main() {
    echo "Mega service start duration is $duration s"

    if [ "${mode}" == "perf" ]; then
-        python3 $WORKPATH/tests/chatqna_benchmark.py
+        echo "not implemented"
    elif [ "${mode}" == "" ]; then
        validate_microservices
        validate_megaservice
--- a/GraphRAG/ui/svelte/playwright.config.ts
+++ b/GraphRAG/ui/svelte/playwright.config.ts
@@ -21,7 +21,7 @@ export default defineConfig({
 		 * Maximum time expect() should wait for the condition to be met.
 		 * For example in `await expect(locator).toHaveText();`
 		 */
-		timeout: 30000,
+		timeout: 300000,
 	},
 	/* Run tests in files in parallel */
 	fullyParallel: true,