Update benchmark scripts (#1883)

Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-04-25 17:05:48 +08:00
parent 18b4f39f27
commit be5933ad85
6 changed files with 21 additions and 20 deletions
--- a/ChatQnA/benchmark_chatqna.yaml
+++ b/ChatQnA/benchmark_chatqna.yaml
@@ -3,7 +3,7 @@

 deploy:
  device: gaudi
-  version: 1.2.0
+  version: 1.3.0
  modelUseHostPath: /mnt/models
  HUGGINGFACEHUB_API_TOKEN: "" # mandatory
  node: [1, 2, 4, 8]
--- a/DocSum/benchmark_docsum.yaml
+++ b/DocSum/benchmark_docsum.yaml
@@ -3,7 +3,7 @@

 deploy:
  device: gaudi
-  version: 1.2.0
+  version: 1.3.0
  modelUseHostPath: /mnt/models
  HUGGINGFACEHUB_API_TOKEN: "" # mandatory
  node: [1]
@@ -20,14 +20,10 @@ deploy:
        memory_capacity: "8000Mi"
      replicaCount: [1]

-    teirerank:
-      enabled: False
-
    llm:
      engine: vllm  # or tgi
      model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory
-      replicaCount:
-        without_teirerank: [1]   # When teirerank.enabled is False
+      replicaCount: [1]
      resources:
        enabled: False
        cards_per_instance: 1
@@ -78,7 +74,7 @@ benchmark:

  # workload, all of the test cases will run for benchmark
  bench_target: ["docsumfixed"] # specify the bench_target for benchmark
-  dataset: "/home/sdp/upload.txt"  # specify the absolute path to the dataset file
+  dataset: "/home/sdp/pubmed_10.txt"  # specify the absolute path to the dataset file
  summary_type: "stuff"
  stream: True

--- a/README-deploy-benchmark.md
+++ b/README-deploy-benchmark.md
@@ -1,4 +1,4 @@
-# ChatQnA Benchmarking
+# Deploy and Benchmark

 ## Purpose

@@ -8,6 +8,11 @@ We aim to run these benchmarks and share them with the OPEA community for three
 - To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
 - To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.

+### Support Example List
+
+- ChatQnA
+- DocSum
+
 ## Table of Contents

 - [Prerequisites](#prerequisites)
@@ -68,6 +73,7 @@ Before running the benchmarks, ensure you have:
   ```bash
   pip install -r requirements.txt
   ```
+   notes: the benchmark need `opea-eval>=1.3`, if v1.3 is not released, please build the `opea-eval` from [source](https://github.com/opea-project/GenAIEval).

 ## Data Preparation

--- a/deploy.py
+++ b/deploy.py
@@ -224,6 +224,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
            "modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
        }
    }
+    os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "")

    # Configure components
    values = configure_node_selectors(values, node_selector or {}, deploy_config)
@@ -338,17 +339,15 @@ def get_hw_values_file(deploy_config, chart_dir):
    version = deploy_config.get("version", "1.1.0")

    if os.path.isdir(chart_dir):
-        # Determine which values file to use based on version
-        if version in ["1.0.0", "1.1.0"]:
-            hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
-        else:
-            hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
-
+        hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
        if not os.path.exists(hw_values_file):
            print(f"Warning: {hw_values_file} not found")
-            hw_values_file = None
-        else:
-            print(f"Device-specific values file found: {hw_values_file}")
+            hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
+            if not os.path.exists(hw_values_file):
+                print(f"Warning: {hw_values_file} not found")
+                print(f"Error: Can not found a correct values file for {device_type} with {llm_engine}")
+                sys.exit(1)
+        print(f"Device-specific values file found: {hw_values_file}")
    else:
        print(f"Error: Could not find directory for {chart_dir}")
        hw_values_file = None
--- a/deploy_and_benchmark.py
+++ b/deploy_and_benchmark.py
@@ -54,7 +54,7 @@ def construct_deploy_config(deploy_config, target_node, batch_param_value=None,

    # First determine which llm replicaCount to use based on teirerank.enabled
    services = new_config.get("services", {})
-    teirerank_enabled = services.get("teirerank", {}).get("enabled", True)
+    teirerank_enabled = services.get("teirerank", {}).get("enabled", False)

    # Process each service's configuration
    for service_name, service_config in services.items():
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 kubernetes
 locust
 numpy
-opea-eval>=1.2
+opea-eval>=1.3
 prometheus_client
 pytest
 pyyaml