[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
2024-10-22 05:38:40 +00:00
parent 0d3876d6fa
commit 8effe7a4eb
1 changed files with 78 additions and 79 deletions
--- a/ChatQnA/benchmark/performance/helm_charts/deployment.py
+++ b/ChatQnA/benchmark/performance/helm_charts/deployment.py
@@ -1,8 +1,13 @@
-import os
+# Copyright (C) 2024 Intel Corporation
-import yaml
+# SPDX-License-Identifier: Apache-2.0
 import argparse
 import os
 import subprocess
 import yaml
 def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
    common_pods = [
@@ -10,14 +15,11 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
        "embedding-dependency-deploy",
        "dataprep-deploy",
        "vector-db",
-        "retriever-deploy"
+        "retriever-deploy",
    ]
    if with_rerank:
-        pods_list = common_pods + [
+        pods_list = common_pods + ["reranking-dependency-deploy", "llm-dependency-deploy"]
            "reranking-dependency-deploy",
            "llm-dependency-deploy"
        ]
    else:
        pods_list = common_pods + ["llm-dependency-deploy"]
@@ -29,7 +31,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
            {"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
            {"name": "dataprep-deploy", "replicas": 1},
            {"name": "vector-db", "replicas": 1},
-            {"name": "retriever-deploy", "replicas": 2}
+            {"name": "retriever-deploy", "replicas": 2},
        ]
    else:
        replicas = [
@@ -39,58 +41,42 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
            {"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes},
            {"name": "dataprep-deploy", "replicas": 1},
            {"name": "vector-db", "replicas": 1},
-            {"name": "retriever-deploy", "replicas": 1 * num_nodes}
+            {"name": "retriever-deploy", "replicas": 1 * num_nodes},
        ]
    resources = [
-        {"name": "chatqna-backend-server-deploy", "resources": {
+        {
-            'limits': {
+            "name": "chatqna-backend-server-deploy",
-                'cpu': "16",
+            "resources": {"limits": {"cpu": "16", "memory": "8000Mi"}, "requests": {"cpu": "16", "memory": "8000Mi"}},
-                'memory': "8000Mi"
+        },
-            },
+        {
-            'requests': {
+            "name": "embedding-dependency-deploy",
-                'cpu': "16",
+            "resources": {"limits": {"cpu": "80", "memory": "20000Mi"}, "requests": {"cpu": "80", "memory": "20000Mi"}},
-                'memory': "8000Mi"
+        },
-            }
+        (
-        }},
+            {"name": "reranking-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}
-        {"name": "embedding-dependency-deploy", "resources": {
+            if with_rerank
-            'limits': {
+            else None
-                'cpu': "80",
+        ),
-                'memory': "20000Mi"
+        {"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
-            },
+        {"name": "retriever-deploy", "resources": {"requests": {"cpu": "16", "memory": "8000Mi"}}},
            'requests': {
                'cpu': "80",
                'memory': "20000Mi"
            }
        }},
        {"name": "reranking-dependency-deploy", "resources": {
            'limits': {
                'habana.ai/gaudi': 1
            }
        }} if with_rerank else None,
        {"name": "llm-dependency-deploy", "resources": {
            'limits': {
                'habana.ai/gaudi': 1
            }
        }},
        {"name": "retriever-deploy", "resources": {
            'requests': {
                'cpu': "16",
                'memory': "8000Mi"
            }
        }}
    ]
    replicas = [replica for replica in replicas if replica]
    resources = [resource for resource in resources if resource]
    tgi_params = [
-        {"name": "llm-dependency-deploy", "args": {
+        {
-            '--max-input-length': 1280,'--max-total-tokens': 2048,
+            "name": "llm-dependency-deploy",
-            '--max-batch-total-tokens': 35536,'--max-batch-prefill-tokens': 4096,
+            "args": {
-        }},
+                "--max-input-length": 1280,
                "--max-total-tokens": 2048,
                "--max-batch-total-tokens": 35536,
                "--max-batch-prefill-tokens": 4096,
            },
        },
    ]
-    
+
    replicas_dict = {item["name"]: item["replicas"] for item in replicas}
    resources_dict = {item["name"]: item["resources"] for item in resources}
    tgi_params_dict = {item["name"]: item["args"] for item in tgi_params}
@@ -99,12 +85,9 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
        {"dict": replicas_dict, "key": "replicas"},
    ]
    if mode == "tuned":
-        dicts_to_check.extend([
+        dicts_to_check.extend([{"dict": resources_dict, "key": "resources"}, {"dict": tgi_params_dict, "key": "args"}])
            {"dict": resources_dict, "key": "resources"},
            {"dict": tgi_params_dict, "key": "args"}
        ])
-    merged_specs = {'podSpecs': []}
+    merged_specs = {"podSpecs": []}
    for pod in pods_list:
        pod_spec = {"name": pod}
@@ -114,55 +97,71 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
                pod_spec[item["key"]] = item["dict"][pod]
        if len(pod_spec) > 1:
-            merged_specs['podSpecs'].append(pod_spec)
+            merged_specs["podSpecs"].append(pod_spec)
    yaml_data = yaml.dump(merged_specs, default_flow_style=False)
    print(yaml_data)
-    
+
    if with_rerank:
-        filename = f'{mode}_{num_nodes}_gaudi_with_rerank.yaml'
+        filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
    else:
-        filename = f'{mode}_{num_nodes}_gaudi_without_rerank.yaml'
+        filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
-    with open(filename, 'w') as file:
+    with open(filename, "w") as file:
        file.write(yaml_data)
-        
+
    current_dir = os.getcwd()
    filepath = os.path.join(current_dir, filename)
    print(f"YAML file {filepath} has been generated.")
-    
+
    return filepath
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--name", help="The name of example pipelines", default="chatqna")
    parser.add_argument("--folder", help="The path of helmcharts folder", default=".")
-    parser.add_argument("--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default = 1, required=True)
+    parser.add_argument(
-    parser.add_argument("--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default = "oob")
+        "--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default=1, required=True
-    parser.add_argument("--workflow", help="with rerank in the pipeline", type=str, choices=["with_rerank", "without_rerank"], default = "with_rerank")
+    )
-    
+    parser.add_argument(
        "--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default="oob"
    )
    parser.add_argument(
        "--workflow",
        help="with rerank in the pipeline",
        type=str,
        choices=["with_rerank", "without_rerank"],
        default="with_rerank",
    )
    parser.add_argument("--template", help="helm template", action="store_true")
    args = parser.parse_args()
    if args.workflow == "with_rerank":
        with_rerank = True
-        workflow_file = './hpu_with_rerank.yaml'
+        workflow_file = "./hpu_with_rerank.yaml"
    else:
        with_rerank = False
-        workflow_file = './hpu_without_rerank.yaml'
+        workflow_file = "./hpu_without_rerank.yaml"
-    customize_filepath = generate_yaml(args.num_nodes, mode = args.mode, with_rerank = with_rerank)
+    customize_filepath = generate_yaml(args.num_nodes, mode=args.mode, with_rerank=with_rerank)
    if args.template:
-        subprocess.run(['helm', 'template', args.folder, '-f', workflow_file, '-f', customize_filepath],
+        subprocess.run(
-                    check=True,
+            ["helm", "template", args.folder, "-f", workflow_file, "-f", customize_filepath],
-                    text=True,
+            check=True,
-                    capture_output=False)
+            text=True,
            capture_output=False,
        )
    else:
-        subprocess.run(['helm', 'install', args.name, args.folder, '-f', workflow_file, '-f', customize_filepath],
+        subprocess.run(
-                    check=True,
+            ["helm", "install", args.name, args.folder, "-f", workflow_file, "-f", customize_filepath],
-                    text=True,
+            check=True,
-                    capture_output=False)
+            text=True,
-    
+            capture_output=False,
        )
 if __name__ == "__main__":
-    main()
+    main()