Fix benchmark scripts (#1517)

- Align benchmark default config: 1. Update default helm charts version. 2. Add `# mandatory` comment. 3. Update default model ID for LLM. - Fix deploy issue: 1. Support different `replicaCount` for w/ w/o rerank test. 2. Add `max_num_seqs` for vllm. 3. Add resource setting for tune mode. - Fix Benchmark issue: 1. Update `user_queries` and `concurrency` setting. 2. Remove invalid parameters. 3. Fix `dataset` and `prompt` setting. And dataset ingest into db. 5. Fix the benchmark hang issue with large user queries. Update `"processes": 16` will fix this issue. 6. Update the eval_path setting logical. - Optimize benchmark readme. - Optimize the log path to make the logs more readable. Signed-off-by: chensuyue <suyue.chen@intel.com> Signed-off-by: Cathy Zhang <cathy.zhang@intel.com> Signed-off-by: letonghan <letong.han@intel.com>
2025-02-28 10:30:54 +08:00
parent 78f8ae524d
commit 3d8009aa91
6 changed files with 641 additions and 207 deletions
--- a/deploy_and_benchmark.py
+++ b/deploy_and_benchmark.py
@@ -23,13 +23,14 @@ def read_yaml(file_path):
        return None


-def construct_deploy_config(deploy_config, target_node, max_batch_size=None):
-    """Construct a new deploy config based on the target node number and optional max_batch_size.
+def construct_deploy_config(deploy_config, target_node, batch_param_value=None, test_mode="oob"):
+    """Construct a new deploy config based on the target node number and optional batch parameter value.

    Args:
        deploy_config: Original deploy config dictionary
        target_node: Target node number to match in the node array
-        max_batch_size: Optional specific max_batch_size value to use
+        batch_param_value: Optional specific batch parameter value to use
+        test_mode: Test mode, either 'oob' or 'tune'

    Returns:
        A new deploy config with single values for node and instance_num
@@ -51,21 +52,79 @@ def construct_deploy_config(deploy_config, target_node, max_batch_size=None):
    # Set the single node value
    new_config["node"] = target_node

-    # Update instance_num for each service based on the same index
-    for service_name, service_config in new_config.get("services", {}).items():
-        if "replicaCount" in service_config:
-            instance_nums = service_config["replicaCount"]
-            if isinstance(instance_nums, list):
-                if len(instance_nums) != len(nodes):
-                    raise ValueError(
-                        f"instance_num array length ({len(instance_nums)}) for service {service_name} "
-                        f"doesn't match node array length ({len(nodes)})"
-                    )
-                service_config["replicaCount"] = instance_nums[node_index]
+    # First determine which llm replicaCount to use based on teirerank.enabled
+    services = new_config.get("services", {})
+    teirerank_enabled = services.get("teirerank", {}).get("enabled", True)

-    # Update max_batch_size if specified
-    if max_batch_size is not None and "llm" in new_config["services"]:
-        new_config["services"]["llm"]["max_batch_size"] = max_batch_size
+    # Process each service's configuration
+    for service_name, service_config in services.items():
+        # Handle replicaCount
+        if "replicaCount" in service_config:
+            if service_name == "llm" and isinstance(service_config["replicaCount"], dict):
+                replica_counts = service_config["replicaCount"]
+                service_config["replicaCount"] = (
+                    replica_counts["with_teirerank"] if teirerank_enabled else replica_counts["without_teirerank"]
+                )
+
+            if isinstance(service_config["replicaCount"], list):
+                if len(service_config["replicaCount"]) < len(nodes):
+                    raise ValueError(
+                        f"replicaCount array length ({len(service_config['replicaCount'])}) for service {service_name} "
+                        f"smaller than node array length ({len(nodes)})"
+                    )
+                service_config["replicaCount"] = service_config["replicaCount"][node_index]
+
+        # Handle resources based on test_mode
+        if "resources" in service_config:
+            resources = service_config["resources"]
+            if test_mode == "tune" or resources.get("enabled", False):
+                # Keep resource configuration but remove enabled field
+                resources.pop("enabled", None)
+            else:
+                # Remove resource configuration in OOB mode when disabled
+                service_config.pop("resources")
+
+        # Handle model parameters for LLM service
+        if service_name == "llm" and "model_params" in service_config:
+            model_params = service_config["model_params"]
+            engine = service_config.get("engine", "tgi")
+
+            # Get engine-specific parameters
+            engine_params = model_params.get(engine, {})
+
+            # Handle batch parameters
+            if "batch_params" in engine_params:
+                batch_params = engine_params["batch_params"]
+                if test_mode == "tune" or batch_params.get("enabled", False):
+                    # Keep batch parameters configuration but remove enabled field
+                    batch_params.pop("enabled", None)
+
+                    # Update batch parameter value if specified
+                    if batch_param_value is not None:
+                        if engine == "tgi":
+                            batch_params["max_batch_size"] = str(batch_param_value)
+                        elif engine == "vllm":
+                            batch_params["max_num_seqs"] = str(batch_param_value)
+                else:
+                    engine_params.pop("batch_params")
+
+            # Handle token parameters
+            if "token_params" in engine_params:
+                token_params = engine_params["token_params"]
+                if test_mode == "tune" or token_params.get("enabled", False):
+                    # Keep token parameters configuration but remove enabled field
+                    token_params.pop("enabled", None)
+                else:
+                    # Remove token parameters in OOB mode when disabled
+                    engine_params.pop("token_params")
+
+            # Update model_params with engine-specific parameters only
+            model_params.clear()
+            model_params[engine] = engine_params
+
+            # Remove model_params if empty or if engine_params is empty
+            if not model_params or not engine_params:
+                service_config.pop("model_params")

    return new_config

@@ -84,13 +143,18 @@ def pull_helm_chart(chart_pull_url, version, chart_name):
    return untar_dir


-def main(yaml_file, target_node=None):
+def main(yaml_file, target_node=None, test_mode="oob"):
    """Main function to process deployment configuration.

    Args:
        yaml_file: Path to the YAML configuration file
        target_node: Optional target number of nodes to deploy. If not specified, will process all nodes.
+        test_mode: Test mode, either "oob" (out of box) or "tune". Defaults to "oob".
    """
+    if test_mode not in ["oob", "tune"]:
+        print("Error: test_mode must be either 'oob' or 'tune'")
+        return None
+
    config = read_yaml(yaml_file)
    if config is None:
        print("Failed to read YAML file.")
@@ -116,7 +180,7 @@ def main(yaml_file, target_node=None):

    # Pull the Helm chart
    chart_pull_url = f"oci://ghcr.io/opea-project/charts/{chart_name}"
-    version = deploy_config.get("version", "1.1.0")
+    version = deploy_config.get("version", "0-latest")
    chart_dir = pull_helm_chart(chart_pull_url, version, chart_name)
    if not chart_dir:
        return
@@ -140,20 +204,61 @@ def main(yaml_file, target_node=None):
                continue

            try:
-                # Process max_batch_sizes
-                max_batch_sizes = deploy_config.get("services", {}).get("llm", {}).get("max_batch_size", [])
-                if not isinstance(max_batch_sizes, list):
-                    max_batch_sizes = [max_batch_sizes]
+                # Process batch parameters based on engine type
+                services = deploy_config.get("services", {})
+                llm_config = services.get("llm", {})
+
+                if "model_params" in llm_config:
+                    model_params = llm_config["model_params"]
+                    engine = llm_config.get("engine", "tgi")
+
+                    # Get engine-specific parameters
+                    engine_params = model_params.get(engine, {})
+
+                    # Handle batch parameters
+                    batch_params = []
+                    if "batch_params" in engine_params:
+                        key = "max_batch_size" if engine == "tgi" else "max_num_seqs"
+                        batch_params = engine_params["batch_params"].get(key, [])
+                        param_name = key
+
+                    if not isinstance(batch_params, list):
+                        batch_params = [batch_params]
+
+                    # Skip multiple iterations if batch parameter is empty
+                    if batch_params == [""] or not batch_params:
+                        batch_params = [None]
+                else:
+                    batch_params = [None]
+                    param_name = "batch_param"
+
+                # Get timeout and interval from deploy config for check-ready
+                timeout = deploy_config.get("timeout", 1000)  # default 1000s
+                interval = deploy_config.get("interval", 5)  # default 5s

                values_file_path = None
-                for i, max_batch_size in enumerate(max_batch_sizes):
-                    print(f"\nProcessing max_batch_size: {max_batch_size}")
+                # Create benchmark output directory
+                benchmark_dir = os.path.join(os.getcwd(), "benchmark_output")
+                os.makedirs(benchmark_dir, exist_ok=True)
+
+                for i, batch_param in enumerate(batch_params):
+                    print(f"\nProcessing {test_mode} mode {param_name}: {batch_param}")
+                    # Create subdirectory for this iteration with test mode in the name
+                    iteration_dir = os.path.join(
+                        benchmark_dir,
+                        f"benchmark_{test_mode}_node{node}_batch{batch_param if batch_param is not None else 'default'}",
+                    )
+                    os.makedirs(iteration_dir, exist_ok=True)

                    # Construct new deploy config
-                    new_deploy_config = construct_deploy_config(deploy_config, node, max_batch_size)
+                    new_deploy_config = construct_deploy_config(deploy_config, node, batch_param, test_mode)

                    # Write the new deploy config to a temporary file
-                    temp_config_file = f"temp_deploy_config_{node}_{max_batch_size}.yaml"
+                    temp_config_file = (
+                        f"temp_deploy_config_{node}.yaml"
+                        if batch_param is None
+                        else f"temp_deploy_config_{node}_{batch_param}.yaml"
+                    )
                    try:
                        with open(temp_config_file, "w") as f:
                            yaml.dump(new_deploy_config, f)
@@ -178,6 +283,8 @@ def main(yaml_file, target_node=None):
                            if match:
                                values_file_path = match.group(1)
                                print(f"Captured values_file_path: {values_file_path}")
+                                # Copy values file to iteration directory
+                                shutil.copy2(values_file_path, iteration_dir)
                            else:
                                print("values_file_path not found in the output")

@@ -198,12 +305,20 @@ def main(yaml_file, target_node=None):
                                values_file_path,
                                "--update-service",
                            ]
-                            result = subprocess.run(cmd, check=True)
+                            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
                            if result.returncode != 0:
-                                print(
-                                    f"Update failed for {node} nodes configuration with max_batch_size {max_batch_size}"
-                                )
-                                break  # Skip remaining max_batch_sizes for this node
+                                print(f"Update failed for {node} nodes configuration with {param_name} {batch_param}")
+                                break  # Skip remaining {param_name} for this node
+
+                            # Update values_file_path from the output
+                            match = re.search(r"values_file_path: (\S+)", result.stdout)
+                            if match:
+                                values_file_path = match.group(1)
+                                print(f"Updated values_file_path: {values_file_path}")
+                                # Copy values file to iteration directory
+                                shutil.copy2(values_file_path, iteration_dir)
+                            else:
+                                print("values_file_path not found in the output")

                        # Wait for deployment to be ready
                        print("\nWaiting for deployment to be ready...")
@@ -215,26 +330,42 @@ def main(yaml_file, target_node=None):
                            "--namespace",
                            namespace,
                            "--check-ready",
+                            "--timeout",
+                            str(timeout),
+                            "--interval",
+                            str(interval),
                        ]
                        try:
-                            result = subprocess.run(cmd, check=True)
-                            print("Deployments are ready!")
+                            result = subprocess.run(
+                                cmd, check=False
+                            )  # Changed to check=False to handle return code manually
+                            if result.returncode == 0:
+                                print("Deployments are ready!")
+                                # Run benchmark only if deployment is ready
+                                run_benchmark(
+                                    benchmark_config=benchmark_config,
+                                    chart_name=chart_name,
+                                    namespace=namespace,
+                                    node_num=node,
+                                    llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""),
+                                    output_dir=iteration_dir,
+                                )
+                            else:
+                                print(
+                                    f"Deployments are not ready after timeout period during "
+                                    f"{'deployment' if i == 0 else 'update'} for {node} nodes. "
+                                    f"Skipping remaining iterations."
+                                )
+                                break  # Exit the batch parameter loop for current node
                        except subprocess.CalledProcessError as e:
-                            print(f"Deployments status failed with returncode: {e.returncode}")
-
-                        # Run benchmark
-                        run_benchmark(
-                            benchmark_config=benchmark_config,
-                            chart_name=chart_name,
-                            namespace=namespace,
-                            llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""),
-                        )
+                            print(f"Error while checking deployment status: {str(e)}")
+                            break  # Exit the batch parameter loop for current node

                    except Exception as e:
                        print(
-                            f"Error during {'deployment' if i == 0 else 'update'} for {node} nodes with max_batch_size {max_batch_size}: {str(e)}"
+                            f"Error during {'deployment' if i == 0 else 'update'} for {node} nodes with {param_name} {batch_param}: {str(e)}"
                        )
-                        break  # Skip remaining max_batch_sizes for this node
+                        break  # Skip remaining {param_name} for this node
                    finally:
                        # Clean up the temporary file
                        if os.path.exists(temp_config_file):
@@ -287,6 +418,7 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Deploy and benchmark with specific node configuration.")
    parser.add_argument("yaml_file", help="Path to the YAML configuration file")
    parser.add_argument("--target-node", type=int, help="Optional: Target number of nodes to deploy.", default=None)
+    parser.add_argument("--test-mode", type=str, help="Test mode, either 'oob' (out of box) or 'tune'.", default="oob")

    args = parser.parse_args()
-    main(args.yaml_file, args.target_node)
+    main(args.yaml_file, args.target_node, args.test_mode)