diff --git a/ChatQnA/benchmark_chatqna.yaml b/ChatQnA/benchmark_chatqna.yaml index 407d555ce..ae74aa9b9 100644 --- a/ChatQnA/benchmark_chatqna.yaml +++ b/ChatQnA/benchmark_chatqna.yaml @@ -3,7 +3,7 @@ deploy: device: gaudi - version: 1.2.0 + version: 1.3.0 modelUseHostPath: /mnt/models HUGGINGFACEHUB_API_TOKEN: "" # mandatory node: [1, 2, 4, 8] diff --git a/DocSum/benchmark_docsum.yaml b/DocSum/benchmark_docsum.yaml index 66aab5ba6..908f2be3e 100644 --- a/DocSum/benchmark_docsum.yaml +++ b/DocSum/benchmark_docsum.yaml @@ -3,7 +3,7 @@ deploy: device: gaudi - version: 1.2.0 + version: 1.3.0 modelUseHostPath: /mnt/models HUGGINGFACEHUB_API_TOKEN: "" # mandatory node: [1] @@ -20,14 +20,10 @@ deploy: memory_capacity: "8000Mi" replicaCount: [1] - teirerank: - enabled: False - llm: engine: vllm # or tgi model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory - replicaCount: - without_teirerank: [1] # When teirerank.enabled is False + replicaCount: [1] resources: enabled: False cards_per_instance: 1 @@ -78,7 +74,7 @@ benchmark: # workload, all of the test cases will run for benchmark bench_target: ["docsumfixed"] # specify the bench_target for benchmark - dataset: "/home/sdp/upload.txt" # specify the absolute path to the dataset file + dataset: "/home/sdp/pubmed_10.txt" # specify the absolute path to the dataset file summary_type: "stuff" stream: True diff --git a/README-deploy-benchmark.md b/README-deploy-benchmark.md index 9f1a13f8f..1b0f0ee53 100644 --- a/README-deploy-benchmark.md +++ b/README-deploy-benchmark.md @@ -1,4 +1,4 @@ -# ChatQnA Benchmarking +# Deploy and Benchmark ## Purpose @@ -8,6 +8,11 @@ We aim to run these benchmarks and share them with the OPEA community for three - To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case. - To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc. +### Support Example List + +- ChatQnA +- DocSum + ## Table of Contents - [Prerequisites](#prerequisites) @@ -68,6 +73,7 @@ Before running the benchmarks, ensure you have: ```bash pip install -r requirements.txt ``` + notes: the benchmark need `opea-eval>=1.3`, if v1.3 is not released, please build the `opea-eval` from [source](https://github.com/opea-project/GenAIEval). ## Data Preparation diff --git a/deploy.py b/deploy.py index 6c7da7474..e74700ca5 100644 --- a/deploy.py +++ b/deploy.py @@ -224,6 +224,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no "modelUseHostPath": deploy_config.get("modelUseHostPath", ""), } } + os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "") # Configure components values = configure_node_selectors(values, node_selector or {}, deploy_config) @@ -338,17 +339,15 @@ def get_hw_values_file(deploy_config, chart_dir): version = deploy_config.get("version", "1.1.0") if os.path.isdir(chart_dir): - # Determine which values file to use based on version - if version in ["1.0.0", "1.1.0"]: - hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml") - else: - hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml") - + hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml") if not os.path.exists(hw_values_file): print(f"Warning: {hw_values_file} not found") - hw_values_file = None - else: - print(f"Device-specific values file found: {hw_values_file}") + hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml") + if not os.path.exists(hw_values_file): + print(f"Warning: {hw_values_file} not found") + print(f"Error: Can not found a correct values file for {device_type} with {llm_engine}") + sys.exit(1) + print(f"Device-specific values file found: {hw_values_file}") else: print(f"Error: Could not find directory for {chart_dir}") hw_values_file = None diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py index f210f215d..bb729c7b4 100644 --- a/deploy_and_benchmark.py +++ b/deploy_and_benchmark.py @@ -54,7 +54,7 @@ def construct_deploy_config(deploy_config, target_node, batch_param_value=None, # First determine which llm replicaCount to use based on teirerank.enabled services = new_config.get("services", {}) - teirerank_enabled = services.get("teirerank", {}).get("enabled", True) + teirerank_enabled = services.get("teirerank", {}).get("enabled", False) # Process each service's configuration for service_name, service_config in services.items(): diff --git a/requirements.txt b/requirements.txt index 637668c3d..f851f780c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ kubernetes locust numpy -opea-eval>=1.2 +opea-eval>=1.3 prometheus_client pytest pyyaml