Update benchmark scripts (#1883)
Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
|
||||
deploy:
|
||||
device: gaudi
|
||||
version: 1.2.0
|
||||
version: 1.3.0
|
||||
modelUseHostPath: /mnt/models
|
||||
HUGGINGFACEHUB_API_TOKEN: "" # mandatory
|
||||
node: [1, 2, 4, 8]
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
deploy:
|
||||
device: gaudi
|
||||
version: 1.2.0
|
||||
version: 1.3.0
|
||||
modelUseHostPath: /mnt/models
|
||||
HUGGINGFACEHUB_API_TOKEN: "" # mandatory
|
||||
node: [1]
|
||||
@@ -20,14 +20,10 @@ deploy:
|
||||
memory_capacity: "8000Mi"
|
||||
replicaCount: [1]
|
||||
|
||||
teirerank:
|
||||
enabled: False
|
||||
|
||||
llm:
|
||||
engine: vllm # or tgi
|
||||
model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory
|
||||
replicaCount:
|
||||
without_teirerank: [1] # When teirerank.enabled is False
|
||||
replicaCount: [1]
|
||||
resources:
|
||||
enabled: False
|
||||
cards_per_instance: 1
|
||||
@@ -78,7 +74,7 @@ benchmark:
|
||||
|
||||
# workload, all of the test cases will run for benchmark
|
||||
bench_target: ["docsumfixed"] # specify the bench_target for benchmark
|
||||
dataset: "/home/sdp/upload.txt" # specify the absolute path to the dataset file
|
||||
dataset: "/home/sdp/pubmed_10.txt" # specify the absolute path to the dataset file
|
||||
summary_type: "stuff"
|
||||
stream: True
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# ChatQnA Benchmarking
|
||||
# Deploy and Benchmark
|
||||
|
||||
## Purpose
|
||||
|
||||
@@ -8,6 +8,11 @@ We aim to run these benchmarks and share them with the OPEA community for three
|
||||
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
|
||||
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.
|
||||
|
||||
### Support Example List
|
||||
|
||||
- ChatQnA
|
||||
- DocSum
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Prerequisites](#prerequisites)
|
||||
@@ -68,6 +73,7 @@ Before running the benchmarks, ensure you have:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
notes: the benchmark need `opea-eval>=1.3`, if v1.3 is not released, please build the `opea-eval` from [source](https://github.com/opea-project/GenAIEval).
|
||||
|
||||
## Data Preparation
|
||||
|
||||
|
||||
17
deploy.py
17
deploy.py
@@ -224,6 +224,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
|
||||
"modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
|
||||
}
|
||||
}
|
||||
os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "")
|
||||
|
||||
# Configure components
|
||||
values = configure_node_selectors(values, node_selector or {}, deploy_config)
|
||||
@@ -338,17 +339,15 @@ def get_hw_values_file(deploy_config, chart_dir):
|
||||
version = deploy_config.get("version", "1.1.0")
|
||||
|
||||
if os.path.isdir(chart_dir):
|
||||
# Determine which values file to use based on version
|
||||
if version in ["1.0.0", "1.1.0"]:
|
||||
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
|
||||
else:
|
||||
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
|
||||
|
||||
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
|
||||
if not os.path.exists(hw_values_file):
|
||||
print(f"Warning: {hw_values_file} not found")
|
||||
hw_values_file = None
|
||||
else:
|
||||
print(f"Device-specific values file found: {hw_values_file}")
|
||||
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
|
||||
if not os.path.exists(hw_values_file):
|
||||
print(f"Warning: {hw_values_file} not found")
|
||||
print(f"Error: Can not found a correct values file for {device_type} with {llm_engine}")
|
||||
sys.exit(1)
|
||||
print(f"Device-specific values file found: {hw_values_file}")
|
||||
else:
|
||||
print(f"Error: Could not find directory for {chart_dir}")
|
||||
hw_values_file = None
|
||||
|
||||
@@ -54,7 +54,7 @@ def construct_deploy_config(deploy_config, target_node, batch_param_value=None,
|
||||
|
||||
# First determine which llm replicaCount to use based on teirerank.enabled
|
||||
services = new_config.get("services", {})
|
||||
teirerank_enabled = services.get("teirerank", {}).get("enabled", True)
|
||||
teirerank_enabled = services.get("teirerank", {}).get("enabled", False)
|
||||
|
||||
# Process each service's configuration
|
||||
for service_name, service_config in services.items():
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
kubernetes
|
||||
locust
|
||||
numpy
|
||||
opea-eval>=1.2
|
||||
opea-eval>=1.3
|
||||
prometheus_client
|
||||
pytest
|
||||
pyyaml
|
||||
|
||||
Reference in New Issue
Block a user