[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
@@ -1,8 +1,13 @@
|
|||||||
import os
|
# Copyright (C) 2024 Intel Corporation
|
||||||
import yaml
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
||||||
|
|
||||||
common_pods = [
|
common_pods = [
|
||||||
@@ -10,14 +15,11 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
"embedding-dependency-deploy",
|
"embedding-dependency-deploy",
|
||||||
"dataprep-deploy",
|
"dataprep-deploy",
|
||||||
"vector-db",
|
"vector-db",
|
||||||
"retriever-deploy"
|
"retriever-deploy",
|
||||||
]
|
]
|
||||||
|
|
||||||
if with_rerank:
|
if with_rerank:
|
||||||
pods_list = common_pods + [
|
pods_list = common_pods + ["reranking-dependency-deploy", "llm-dependency-deploy"]
|
||||||
"reranking-dependency-deploy",
|
|
||||||
"llm-dependency-deploy"
|
|
||||||
]
|
|
||||||
else:
|
else:
|
||||||
pods_list = common_pods + ["llm-dependency-deploy"]
|
pods_list = common_pods + ["llm-dependency-deploy"]
|
||||||
|
|
||||||
@@ -29,7 +31,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
||||||
{"name": "dataprep-deploy", "replicas": 1},
|
{"name": "dataprep-deploy", "replicas": 1},
|
||||||
{"name": "vector-db", "replicas": 1},
|
{"name": "vector-db", "replicas": 1},
|
||||||
{"name": "retriever-deploy", "replicas": 2}
|
{"name": "retriever-deploy", "replicas": 2},
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
replicas = [
|
replicas = [
|
||||||
@@ -39,58 +41,42 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
{"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes},
|
{"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes},
|
||||||
{"name": "dataprep-deploy", "replicas": 1},
|
{"name": "dataprep-deploy", "replicas": 1},
|
||||||
{"name": "vector-db", "replicas": 1},
|
{"name": "vector-db", "replicas": 1},
|
||||||
{"name": "retriever-deploy", "replicas": 1 * num_nodes}
|
{"name": "retriever-deploy", "replicas": 1 * num_nodes},
|
||||||
]
|
]
|
||||||
|
|
||||||
resources = [
|
resources = [
|
||||||
{"name": "chatqna-backend-server-deploy", "resources": {
|
{
|
||||||
'limits': {
|
"name": "chatqna-backend-server-deploy",
|
||||||
'cpu': "16",
|
"resources": {"limits": {"cpu": "16", "memory": "8000Mi"}, "requests": {"cpu": "16", "memory": "8000Mi"}},
|
||||||
'memory': "8000Mi"
|
},
|
||||||
},
|
{
|
||||||
'requests': {
|
"name": "embedding-dependency-deploy",
|
||||||
'cpu': "16",
|
"resources": {"limits": {"cpu": "80", "memory": "20000Mi"}, "requests": {"cpu": "80", "memory": "20000Mi"}},
|
||||||
'memory': "8000Mi"
|
},
|
||||||
}
|
(
|
||||||
}},
|
{"name": "reranking-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}
|
||||||
{"name": "embedding-dependency-deploy", "resources": {
|
if with_rerank
|
||||||
'limits': {
|
else None
|
||||||
'cpu': "80",
|
),
|
||||||
'memory': "20000Mi"
|
{"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
|
||||||
},
|
{"name": "retriever-deploy", "resources": {"requests": {"cpu": "16", "memory": "8000Mi"}}},
|
||||||
'requests': {
|
|
||||||
'cpu': "80",
|
|
||||||
'memory': "20000Mi"
|
|
||||||
}
|
|
||||||
}},
|
|
||||||
{"name": "reranking-dependency-deploy", "resources": {
|
|
||||||
'limits': {
|
|
||||||
'habana.ai/gaudi': 1
|
|
||||||
}
|
|
||||||
}} if with_rerank else None,
|
|
||||||
{"name": "llm-dependency-deploy", "resources": {
|
|
||||||
'limits': {
|
|
||||||
'habana.ai/gaudi': 1
|
|
||||||
}
|
|
||||||
}},
|
|
||||||
{"name": "retriever-deploy", "resources": {
|
|
||||||
'requests': {
|
|
||||||
'cpu': "16",
|
|
||||||
'memory': "8000Mi"
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
]
|
]
|
||||||
|
|
||||||
replicas = [replica for replica in replicas if replica]
|
replicas = [replica for replica in replicas if replica]
|
||||||
resources = [resource for resource in resources if resource]
|
resources = [resource for resource in resources if resource]
|
||||||
|
|
||||||
tgi_params = [
|
tgi_params = [
|
||||||
{"name": "llm-dependency-deploy", "args": {
|
{
|
||||||
'--max-input-length': 1280,'--max-total-tokens': 2048,
|
"name": "llm-dependency-deploy",
|
||||||
'--max-batch-total-tokens': 35536,'--max-batch-prefill-tokens': 4096,
|
"args": {
|
||||||
}},
|
"--max-input-length": 1280,
|
||||||
|
"--max-total-tokens": 2048,
|
||||||
|
"--max-batch-total-tokens": 35536,
|
||||||
|
"--max-batch-prefill-tokens": 4096,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
replicas_dict = {item["name"]: item["replicas"] for item in replicas}
|
replicas_dict = {item["name"]: item["replicas"] for item in replicas}
|
||||||
resources_dict = {item["name"]: item["resources"] for item in resources}
|
resources_dict = {item["name"]: item["resources"] for item in resources}
|
||||||
tgi_params_dict = {item["name"]: item["args"] for item in tgi_params}
|
tgi_params_dict = {item["name"]: item["args"] for item in tgi_params}
|
||||||
@@ -99,12 +85,9 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
{"dict": replicas_dict, "key": "replicas"},
|
{"dict": replicas_dict, "key": "replicas"},
|
||||||
]
|
]
|
||||||
if mode == "tuned":
|
if mode == "tuned":
|
||||||
dicts_to_check.extend([
|
dicts_to_check.extend([{"dict": resources_dict, "key": "resources"}, {"dict": tgi_params_dict, "key": "args"}])
|
||||||
{"dict": resources_dict, "key": "resources"},
|
|
||||||
{"dict": tgi_params_dict, "key": "args"}
|
|
||||||
])
|
|
||||||
|
|
||||||
merged_specs = {'podSpecs': []}
|
merged_specs = {"podSpecs": []}
|
||||||
|
|
||||||
for pod in pods_list:
|
for pod in pods_list:
|
||||||
pod_spec = {"name": pod}
|
pod_spec = {"name": pod}
|
||||||
@@ -114,55 +97,71 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
|||||||
pod_spec[item["key"]] = item["dict"][pod]
|
pod_spec[item["key"]] = item["dict"][pod]
|
||||||
|
|
||||||
if len(pod_spec) > 1:
|
if len(pod_spec) > 1:
|
||||||
merged_specs['podSpecs'].append(pod_spec)
|
merged_specs["podSpecs"].append(pod_spec)
|
||||||
|
|
||||||
yaml_data = yaml.dump(merged_specs, default_flow_style=False)
|
yaml_data = yaml.dump(merged_specs, default_flow_style=False)
|
||||||
|
|
||||||
print(yaml_data)
|
print(yaml_data)
|
||||||
|
|
||||||
if with_rerank:
|
if with_rerank:
|
||||||
filename = f'{mode}_{num_nodes}_gaudi_with_rerank.yaml'
|
filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
|
||||||
else:
|
else:
|
||||||
filename = f'{mode}_{num_nodes}_gaudi_without_rerank.yaml'
|
filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
|
||||||
with open(filename, 'w') as file:
|
with open(filename, "w") as file:
|
||||||
file.write(yaml_data)
|
file.write(yaml_data)
|
||||||
|
|
||||||
current_dir = os.getcwd()
|
current_dir = os.getcwd()
|
||||||
filepath = os.path.join(current_dir, filename)
|
filepath = os.path.join(current_dir, filename)
|
||||||
print(f"YAML file {filepath} has been generated.")
|
print(f"YAML file {filepath} has been generated.")
|
||||||
|
|
||||||
return filepath
|
return filepath
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--name", help="The name of example pipelines", default="chatqna")
|
parser.add_argument("--name", help="The name of example pipelines", default="chatqna")
|
||||||
parser.add_argument("--folder", help="The path of helmcharts folder", default=".")
|
parser.add_argument("--folder", help="The path of helmcharts folder", default=".")
|
||||||
parser.add_argument("--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default = 1, required=True)
|
parser.add_argument(
|
||||||
parser.add_argument("--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default = "oob")
|
"--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default=1, required=True
|
||||||
parser.add_argument("--workflow", help="with rerank in the pipeline", type=str, choices=["with_rerank", "without_rerank"], default = "with_rerank")
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default="oob"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--workflow",
|
||||||
|
help="with rerank in the pipeline",
|
||||||
|
type=str,
|
||||||
|
choices=["with_rerank", "without_rerank"],
|
||||||
|
default="with_rerank",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument("--template", help="helm template", action="store_true")
|
parser.add_argument("--template", help="helm template", action="store_true")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.workflow == "with_rerank":
|
if args.workflow == "with_rerank":
|
||||||
with_rerank = True
|
with_rerank = True
|
||||||
workflow_file = './hpu_with_rerank.yaml'
|
workflow_file = "./hpu_with_rerank.yaml"
|
||||||
else:
|
else:
|
||||||
with_rerank = False
|
with_rerank = False
|
||||||
workflow_file = './hpu_without_rerank.yaml'
|
workflow_file = "./hpu_without_rerank.yaml"
|
||||||
|
|
||||||
customize_filepath = generate_yaml(args.num_nodes, mode = args.mode, with_rerank = with_rerank)
|
customize_filepath = generate_yaml(args.num_nodes, mode=args.mode, with_rerank=with_rerank)
|
||||||
|
|
||||||
if args.template:
|
if args.template:
|
||||||
subprocess.run(['helm', 'template', args.folder, '-f', workflow_file, '-f', customize_filepath],
|
subprocess.run(
|
||||||
check=True,
|
["helm", "template", args.folder, "-f", workflow_file, "-f", customize_filepath],
|
||||||
text=True,
|
check=True,
|
||||||
capture_output=False)
|
text=True,
|
||||||
|
capture_output=False,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
subprocess.run(['helm', 'install', args.name, args.folder, '-f', workflow_file, '-f', customize_filepath],
|
subprocess.run(
|
||||||
check=True,
|
["helm", "install", args.name, args.folder, "-f", workflow_file, "-f", customize_filepath],
|
||||||
text=True,
|
check=True,
|
||||||
capture_output=False)
|
text=True,
|
||||||
|
capture_output=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user