add resume finetuning checkpoint ut. (#646)
* add resume finetuning checkpoint ut. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add final tuned model. --------- Co-authored-by: root <root@idc708073.jf.intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@@ -61,7 +62,7 @@ def update_job_status(job_id: FineTuningJobID):
|
||||
status = "cancelled" if status == "stopped" else status
|
||||
logger.info(f"Status of job {job_id} is '{status}'")
|
||||
running_finetuning_jobs[job_id].status = status
|
||||
if status == "finished" or status == "cancelled" or status == "failed":
|
||||
if status == "succeeded" or status == "cancelled" or status == "failed":
|
||||
break
|
||||
time.sleep(CHECK_JOB_STATUS_INTERVAL)
|
||||
|
||||
@@ -190,7 +191,21 @@ def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest):
|
||||
checkpoints = []
|
||||
if os.path.exists(output_dir):
|
||||
# Iterate over the contents of the directory and add an entry for each
|
||||
for _ in os.listdir(output_dir): # Loop over directory contents
|
||||
files = os.listdir(output_dir)
|
||||
for file in files: # Loop over directory contents
|
||||
file_path = os.path.join(output_dir, file)
|
||||
if os.path.isdir(file_path) and file.startswith("checkpoint"):
|
||||
steps = re.findall("\d+", file)[0]
|
||||
checkpointsResponse = FineTuningJobCheckpoint(
|
||||
id=f"ftckpt-{uuid.uuid4()}", # Generate a unique ID
|
||||
created_at=int(time.time()), # Use the current timestamp
|
||||
fine_tuned_model_checkpoint=file_path, # Directory path itself
|
||||
fine_tuning_job_id=fine_tuning_job_id,
|
||||
object="fine_tuning.job.checkpoint",
|
||||
step_number=steps,
|
||||
)
|
||||
checkpoints.append(checkpointsResponse)
|
||||
if job.status == "succeeded":
|
||||
checkpointsResponse = FineTuningJobCheckpoint(
|
||||
id=f"ftckpt-{uuid.uuid4()}", # Generate a unique ID
|
||||
created_at=int(time.time()), # Use the current timestamp
|
||||
@@ -199,7 +214,6 @@ def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest):
|
||||
object="fine_tuning.job.checkpoint",
|
||||
)
|
||||
checkpoints.append(checkpointsResponse)
|
||||
checkpoint_id_to_checkpoint_path[checkpointsResponse.id] = checkpointsResponse.fine_tuned_model_checkpoint
|
||||
|
||||
return checkpoints
|
||||
|
||||
|
||||
Reference in New Issue
Block a user