diff --git a/ChatQnA/benchmark/performance/helm_charts/deployment.py b/ChatQnA/benchmark/performance/helm_charts/deployment.py index f67c980f5..7613047f1 100644 --- a/ChatQnA/benchmark/performance/helm_charts/deployment.py +++ b/ChatQnA/benchmark/performance/helm_charts/deployment.py @@ -1,8 +1,13 @@ -import os -import yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import argparse +import os import subprocess +import yaml + + def generate_yaml(num_nodes, mode="oob", with_rerank="True"): common_pods = [ @@ -10,14 +15,11 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): "embedding-dependency-deploy", "dataprep-deploy", "vector-db", - "retriever-deploy" + "retriever-deploy", ] if with_rerank: - pods_list = common_pods + [ - "reranking-dependency-deploy", - "llm-dependency-deploy" - ] + pods_list = common_pods + ["reranking-dependency-deploy", "llm-dependency-deploy"] else: pods_list = common_pods + ["llm-dependency-deploy"] @@ -29,7 +31,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): {"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8}, {"name": "dataprep-deploy", "replicas": 1}, {"name": "vector-db", "replicas": 1}, - {"name": "retriever-deploy", "replicas": 2} + {"name": "retriever-deploy", "replicas": 2}, ] else: replicas = [ @@ -39,58 +41,42 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): {"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes}, {"name": "dataprep-deploy", "replicas": 1}, {"name": "vector-db", "replicas": 1}, - {"name": "retriever-deploy", "replicas": 1 * num_nodes} + {"name": "retriever-deploy", "replicas": 1 * num_nodes}, ] resources = [ - {"name": "chatqna-backend-server-deploy", "resources": { - 'limits': { - 'cpu': "16", - 'memory': "8000Mi" - }, - 'requests': { - 'cpu': "16", - 'memory': "8000Mi" - } - }}, - {"name": "embedding-dependency-deploy", "resources": { - 'limits': { - 'cpu': "80", - 'memory': "20000Mi" - }, - 'requests': { - 'cpu': "80", - 'memory': "20000Mi" - } - }}, - {"name": "reranking-dependency-deploy", "resources": { - 'limits': { - 'habana.ai/gaudi': 1 - } - }} if with_rerank else None, - {"name": "llm-dependency-deploy", "resources": { - 'limits': { - 'habana.ai/gaudi': 1 - } - }}, - {"name": "retriever-deploy", "resources": { - 'requests': { - 'cpu': "16", - 'memory': "8000Mi" - } - }} + { + "name": "chatqna-backend-server-deploy", + "resources": {"limits": {"cpu": "16", "memory": "8000Mi"}, "requests": {"cpu": "16", "memory": "8000Mi"}}, + }, + { + "name": "embedding-dependency-deploy", + "resources": {"limits": {"cpu": "80", "memory": "20000Mi"}, "requests": {"cpu": "80", "memory": "20000Mi"}}, + }, + ( + {"name": "reranking-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}} + if with_rerank + else None + ), + {"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}, + {"name": "retriever-deploy", "resources": {"requests": {"cpu": "16", "memory": "8000Mi"}}}, ] replicas = [replica for replica in replicas if replica] resources = [resource for resource in resources if resource] tgi_params = [ - {"name": "llm-dependency-deploy", "args": { - '--max-input-length': 1280,'--max-total-tokens': 2048, - '--max-batch-total-tokens': 35536,'--max-batch-prefill-tokens': 4096, - }}, + { + "name": "llm-dependency-deploy", + "args": { + "--max-input-length": 1280, + "--max-total-tokens": 2048, + "--max-batch-total-tokens": 35536, + "--max-batch-prefill-tokens": 4096, + }, + }, ] - + replicas_dict = {item["name"]: item["replicas"] for item in replicas} resources_dict = {item["name"]: item["resources"] for item in resources} tgi_params_dict = {item["name"]: item["args"] for item in tgi_params} @@ -99,12 +85,9 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): {"dict": replicas_dict, "key": "replicas"}, ] if mode == "tuned": - dicts_to_check.extend([ - {"dict": resources_dict, "key": "resources"}, - {"dict": tgi_params_dict, "key": "args"} - ]) + dicts_to_check.extend([{"dict": resources_dict, "key": "resources"}, {"dict": tgi_params_dict, "key": "args"}]) - merged_specs = {'podSpecs': []} + merged_specs = {"podSpecs": []} for pod in pods_list: pod_spec = {"name": pod} @@ -114,55 +97,71 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): pod_spec[item["key"]] = item["dict"][pod] if len(pod_spec) > 1: - merged_specs['podSpecs'].append(pod_spec) + merged_specs["podSpecs"].append(pod_spec) yaml_data = yaml.dump(merged_specs, default_flow_style=False) print(yaml_data) - + if with_rerank: - filename = f'{mode}_{num_nodes}_gaudi_with_rerank.yaml' + filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml" else: - filename = f'{mode}_{num_nodes}_gaudi_without_rerank.yaml' - with open(filename, 'w') as file: + filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml" + with open(filename, "w") as file: file.write(yaml_data) - + current_dir = os.getcwd() filepath = os.path.join(current_dir, filename) print(f"YAML file {filepath} has been generated.") - + return filepath + def main(): parser = argparse.ArgumentParser() parser.add_argument("--name", help="The name of example pipelines", default="chatqna") parser.add_argument("--folder", help="The path of helmcharts folder", default=".") - parser.add_argument("--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default = 1, required=True) - parser.add_argument("--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default = "oob") - parser.add_argument("--workflow", help="with rerank in the pipeline", type=str, choices=["with_rerank", "without_rerank"], default = "with_rerank") - + parser.add_argument( + "--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default=1, required=True + ) + parser.add_argument( + "--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default="oob" + ) + parser.add_argument( + "--workflow", + help="with rerank in the pipeline", + type=str, + choices=["with_rerank", "without_rerank"], + default="with_rerank", + ) + parser.add_argument("--template", help="helm template", action="store_true") args = parser.parse_args() if args.workflow == "with_rerank": with_rerank = True - workflow_file = './hpu_with_rerank.yaml' + workflow_file = "./hpu_with_rerank.yaml" else: with_rerank = False - workflow_file = './hpu_without_rerank.yaml' + workflow_file = "./hpu_without_rerank.yaml" - customize_filepath = generate_yaml(args.num_nodes, mode = args.mode, with_rerank = with_rerank) + customize_filepath = generate_yaml(args.num_nodes, mode=args.mode, with_rerank=with_rerank) if args.template: - subprocess.run(['helm', 'template', args.folder, '-f', workflow_file, '-f', customize_filepath], - check=True, - text=True, - capture_output=False) + subprocess.run( + ["helm", "template", args.folder, "-f", workflow_file, "-f", customize_filepath], + check=True, + text=True, + capture_output=False, + ) else: - subprocess.run(['helm', 'install', args.name, args.folder, '-f', workflow_file, '-f', customize_filepath], - check=True, - text=True, - capture_output=False) - + subprocess.run( + ["helm", "install", args.name, args.folder, "-f", workflow_file, "-f", customize_filepath], + check=True, + text=True, + capture_output=False, + ) + + if __name__ == "__main__": - main() \ No newline at end of file + main()