diff --git a/experiments/eval/data/actions-normalized.csv b/experiments/eval/data/actions-normalized.csv new file mode 100644 index 0000000..eea335b --- /dev/null +++ b/experiments/eval/data/actions-normalized.csv @@ -0,0 +1,4 @@ +label,BC:cells,BC:dynamicecm,Sweep:Cells,Sweep:DynamicECM,Sweep:DynamicECMDampers,Filling,Other +CPU,0.04629667717328299,3.88243023434036,0.002164069189058585,82.18255346583861,13.842310640806883,0.005797982839735358,0.03844692981207934 +GPU,0.034897486509106485,41.227398706349675,13.180664898871365,22.432483220415783,19.749714111977486,1.9939606413394697,1.3808809345371023 +Booster,0.09128030041871667,42.736382370072874,12.260298369798207,22.36492489089858,16.070196185259338,4.364304866307413,2.1126130172448816 diff --git a/experiments/eval/data/actions.csv b/experiments/eval/data/actions.csv new file mode 100644 index 0000000..7ffb865 --- /dev/null +++ b/experiments/eval/data/actions.csv @@ -0,0 +1,4 @@ +label,BC:cells,BC:dynamicecm,Sweep:Cells,Sweep:DynamicECM,Sweep:DynamicECMDampers,Other +CPU,0.9047934333333334,75.87579921250001,0.04229322083333333,1606.12465625,270.5255006333333,0.8646950250000001 +GPU,0.11372260000000001,134.35027680000002,42.952648800000006,73.102122,64.3596162,10.9978052 +Booster,0.155221,72.67267939999999,20.848482800000003,38.0312728,27.327165999999995,11.013917200000002 diff --git a/experiments/eval/scripts/timing-actions.py b/experiments/eval/scripts/timing-actions.py new file mode 100755 index 0000000..eb0f98d --- /dev/null +++ b/experiments/eval/scripts/timing-actions.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + + +import argparse +import sys + + +import timing + + +ignore = ["TimeStep"] + + +if __name__ == "__main__": + p = argparse.ArgumentParser() + p.add_argument("jobs", nargs="+") + p.add_argument("--normalize", action="store_true") + p.add_argument("--extra-columns", nargs="*") + args = p.parse_args() + + columns = [ + "BC:cells", + "BC:dynamicecm", + "Sweep:Cells", + "Sweep:DynamicECM", + "Sweep:DynamicECMDampers", + ] + (args.extra_columns or []) + + dfs = dict() + labels = [] + for label, jobid in [jobarg.split(":") for jobarg in args.jobs]: + jobs, excluded_array_indices = timing.get_jobs(jobid) + df = timing.load_array_mean_timings(jobid, excluded_array_indices).mean() + dfs[label] = df + labels.extend(df.index) + + labels = set(labels) + print(",".join(["label"] + columns + ["Other"])) + + values_by_label = dict() + for label, df in dfs.items(): + values = {"Other": 0} + for c in df.index: + if c in ignore: + continue + elif c not in columns: + values["Other"] += df[c] + print(f"Others+= {c}={df[c]}", file=sys.stderr) + else: + values[c] = df[c] + values_by_label[label] = values + + if args.normalize: + print("Normalizing data to 100%...", file=sys.stderr) + for values in values_by_label.values(): + row_length = sum(values.values()) + for c in values.keys(): + values[c] *= 100 / row_length + + for label, values in values_by_label.items(): + print(label + "," + ",".join(f"{values[c]}" for c in columns + ["Other"])) diff --git a/experiments/eval/scripts/timing.py b/experiments/eval/scripts/timing.py index 68dea21..e7cc437 100755 --- a/experiments/eval/scripts/timing.py +++ b/experiments/eval/scripts/timing.py @@ -72,6 +72,26 @@ def get_accounting_data(jobid: str): return json.loads(sacct_results.stdout.decode("utf8")) +def get_jobs(jobid: str): + accounting_data = get_accounting_data(jobid) + jobs = [] + excluded_array_indices = [] + for array_job in accounting_data["jobs"]: + # Get metadata related to array + array_main_job = array_job["array"]["job_id"] + array_index = array_job["array"]["task_id"] + # The last step is the actual job we want the data for + # The steps before set up cluster etc. + last_step = array_job["steps"][-1] + if last_step["state"] != "COMPLETED": + print(f"WARNING: {array_main_job}.{array_index} has state {last_step['state']}, excluding it from measurements", file=sys.stderr) + excluded_array_indices.append(array_index) + continue + jobs.append(last_step) + + return jobs, excluded_array_indices + + if __name__ == "__main__": p = argparse.ArgumentParser(description="Load and analzye data from nastja timing files") p.add_argument("jobid", nargs="+") @@ -82,23 +102,7 @@ if __name__ == "__main__": results = [] for i, jobid in enumerate(args.jobid, 1): print(f"({i:2}/{len(args.jobid):2}) Loading accounting data for {jobid}", file=sys.stderr) - accounting_data = get_accounting_data(jobid) - - jobs = [] - excluded_array_indices = [] - for array_job in accounting_data["jobs"]: - # Get metadata related to array - array_main_job = array_job["array"]["job_id"] - array_index = array_job["array"]["task_id"] - # The last step is the actual job we want the data for - # The steps before set up cluster etc. - last_step = array_job["steps"][-1] - if last_step["state"] != "COMPLETED": - print(f"WARNING: {array_main_job}.{array_index} has state {last_step['state']}, excluding it from measurements", file=sys.stderr) - excluded_array_indices.append(array_index) - continue - jobs.append(last_step) - + jobs, excluded_array_indices = get_jobs(jobid) array_mean_timings = load_array_mean_timings(jobid, excluded_array_indices) if args.dump_timings: print(array_mean_timings, file=sys.stderr)