Update benchmark scripts (#1883)

Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
chen, suyue
2025-04-25 17:05:48 +08:00
committed by GitHub
parent 18b4f39f27
commit be5933ad85
6 changed files with 21 additions and 20 deletions

View File

@@ -3,7 +3,7 @@
deploy:
device: gaudi
version: 1.2.0
version: 1.3.0
modelUseHostPath: /mnt/models
HUGGINGFACEHUB_API_TOKEN: "" # mandatory
node: [1, 2, 4, 8]

View File

@@ -3,7 +3,7 @@
deploy:
device: gaudi
version: 1.2.0
version: 1.3.0
modelUseHostPath: /mnt/models
HUGGINGFACEHUB_API_TOKEN: "" # mandatory
node: [1]
@@ -20,14 +20,10 @@ deploy:
memory_capacity: "8000Mi"
replicaCount: [1]
teirerank:
enabled: False
llm:
engine: vllm # or tgi
model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory
replicaCount:
without_teirerank: [1] # When teirerank.enabled is False
replicaCount: [1]
resources:
enabled: False
cards_per_instance: 1
@@ -78,7 +74,7 @@ benchmark:
# workload, all of the test cases will run for benchmark
bench_target: ["docsumfixed"] # specify the bench_target for benchmark
dataset: "/home/sdp/upload.txt" # specify the absolute path to the dataset file
dataset: "/home/sdp/pubmed_10.txt" # specify the absolute path to the dataset file
summary_type: "stuff"
stream: True

View File

@@ -1,4 +1,4 @@
# ChatQnA Benchmarking
# Deploy and Benchmark
## Purpose
@@ -8,6 +8,11 @@ We aim to run these benchmarks and share them with the OPEA community for three
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.
### Support Example List
- ChatQnA
- DocSum
## Table of Contents
- [Prerequisites](#prerequisites)
@@ -68,6 +73,7 @@ Before running the benchmarks, ensure you have:
```bash
pip install -r requirements.txt
```
notes: the benchmark need `opea-eval>=1.3`, if v1.3 is not released, please build the `opea-eval` from [source](https://github.com/opea-project/GenAIEval).
## Data Preparation

View File

@@ -224,6 +224,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
"modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
}
}
os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "")
# Configure components
values = configure_node_selectors(values, node_selector or {}, deploy_config)
@@ -338,17 +339,15 @@ def get_hw_values_file(deploy_config, chart_dir):
version = deploy_config.get("version", "1.1.0")
if os.path.isdir(chart_dir):
# Determine which values file to use based on version
if version in ["1.0.0", "1.1.0"]:
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
else:
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
if not os.path.exists(hw_values_file):
print(f"Warning: {hw_values_file} not found")
hw_values_file = None
else:
print(f"Device-specific values file found: {hw_values_file}")
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
if not os.path.exists(hw_values_file):
print(f"Warning: {hw_values_file} not found")
print(f"Error: Can not found a correct values file for {device_type} with {llm_engine}")
sys.exit(1)
print(f"Device-specific values file found: {hw_values_file}")
else:
print(f"Error: Could not find directory for {chart_dir}")
hw_values_file = None

View File

@@ -54,7 +54,7 @@ def construct_deploy_config(deploy_config, target_node, batch_param_value=None,
# First determine which llm replicaCount to use based on teirerank.enabled
services = new_config.get("services", {})
teirerank_enabled = services.get("teirerank", {}).get("enabled", True)
teirerank_enabled = services.get("teirerank", {}).get("enabled", False)
# Process each service's configuration
for service_name, service_config in services.items():

View File

@@ -1,7 +1,7 @@
kubernetes
locust
numpy
opea-eval>=1.2
opea-eval>=1.3
prometheus_client
pytest
pyyaml