Add pgf generation scripts

This commit is contained in:
Paul Brinkmeier 2024-03-09 17:47:50 +01:00
parent 6de64dd5d0
commit 1696d72a6f
7 changed files with 17714 additions and 2 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
label nodes mean_time std_time speedup speedup_std
1 1 1132.1093720291667 18.428644622186628 1.0 0.016278148629010596
2 2 619.9614289104168 7.890810670766857 1.8260964621925766 0.029725469622481192
4 4 348.657283046875 3.3739994706945122 3.247054993762918 0.052856043795043856
8 8 200.79244575312498 1.259910347938215 5.63820699420684 0.09177957145282603
16 16 111.37033198671875 0.3943541234366863 10.16526889911915 0.16547175799372044
32 32 65.69918741145833 0.43464214827532205 17.231710415823503 0.28050034328084494
64 64 38.56257490957031 0.12227397957602136 29.35772247273364 0.4778893698204027
128 128 21.433497051595047 0.14016683538819355 52.81962944749218 0.8598057786755426
256 256 13.247708324598525 0.030540850807946187 85.45699711149668 1.3910817003698723
1 label nodes mean_time std_time speedup speedup_std
2 1 1 1132.1093720291667 18.428644622186628 1.0 0.016278148629010596
3 2 2 619.9614289104168 7.890810670766857 1.8260964621925766 0.029725469622481192
4 4 4 348.657283046875 3.3739994706945122 3.247054993762918 0.052856043795043856
5 8 8 200.79244575312498 1.259910347938215 5.63820699420684 0.09177957145282603
6 16 16 111.37033198671875 0.3943541234366863 10.16526889911915 0.16547175799372044
7 32 32 65.69918741145833 0.43464214827532205 17.231710415823503 0.28050034328084494
8 64 64 38.56257490957031 0.12227397957602136 29.35772247273364 0.4778893698204027
9 128 128 21.433497051595047 0.14016683538819355 52.81962944749218 0.8598057786755426
10 256 256 13.247708324598525 0.030540850807946187 85.45699711149668 1.3910817003698723

View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
import argparse
import json
def print_table(data, spec):
print("\t".join(column for column in spec.keys()))
for data_item in data:
values = []
for retrieve in spec.values():
raw_value = retrieve(data_item)
values.append(raw_value if isinstance(raw_value, str) else str(raw_value))
print("\t".join(values))
if __name__ == "__main__":
p = argparse.ArgumentParser(description="Turn files generated by timing.py into pgf datafiles")
p.add_argument("timing_file")
args = p.parse_args()
with open(args.timing_file, "r", encoding="utf8") as f:
jobs = json.load(f)
scaling_spec = {
"label": lambda job: job["accounting"][0]["nodes"]["count"],
"nodes": lambda job: job["accounting"][0]["nodes"]["count"],
"mean_time": lambda job: job["means"]["TimeStep"],
"std_time": lambda job: job["stds"]["TimeStep"],
"speedup": lambda job: jobs[0]["means"]["TimeStep"] / job["means"]["TimeStep"],
"speedup_std": lambda job: jobs[0]["stds"]["TimeStep"] / job["means"]["TimeStep"],
}
print_table(jobs, scaling_spec)

View File

@ -18,7 +18,8 @@ configs = [
[ 8, 8, 12],
[ 8, 8, 24],
[ 8, 16, 24],
[ 16, 16, 24]
[ 16, 16, 24],
[ 16, 16, 48]
]
out_path = Path(__file__).parent.parent / "generated" / "config"

0
experiments/eval/scripts/substitute.py Normal file → Executable file
View File

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python
import argparse
import json
import pandas
import subprocess
import sys
from pathlib import Path
def load_timing_file(timing_path: Path):
df = pandas.read_csv(timing_path, delim_whitespace=True)
headers = list(df["#Name"][1:])
times = list(df["TotalTime"][1:])
return pandas.DataFrame([times], columns=headers)
def load_all_timings(outdir_path: Path):
timingdir_path = outdir_path / "timing"
timing_paths = sorted(timingdir_path.iterdir())
print(f"Loading {len(timing_paths)} timing files from {timingdir_path}...", file=sys.stderr)
dfs = []
for i, timing_path in enumerate(timing_paths, 1):
dfs.append(load_timing_file(timing_path))
sys.stderr.write("\x1b[1K\r")
sys.stderr.flush()
print(f"[{i:8}/{len(timing_paths):8}] {i/len(timing_paths)*100:6.2f}%", file=sys.stderr, end="", flush=True)
print(file=sys.stderr, flush=True)
return pandas.concat(dfs)
def load_mean_timings(outdir_path: Path):
df = load_all_timings(outdir_path)
return df.mean() / 1000000
def get_outdirs(jobid: str):
print(f"Globbing for {jobid}...", file=sys.stderr)
return sorted(Path("/p/scratch/cellsinsilico/paul/nastja-out").glob(f"*{jobid}*"))
def load_array_mean_timings(jobid: str):
mts = [
load_mean_timings(outdir_path)
for outdir_path
in get_outdirs(jobid)
]
return pandas.DataFrame(list(mts), columns=mts[0].index)
def get_mean_mean_totaltimes(jobid: str):
return load_array_mean_timings(jobid).mean()
def get_std_mean_totaltimes(jobid: str):
return load_array_mean_timings(jobid).std()
def get_accounting_data(jobid: str):
sacct_results = subprocess.run(
["sacct", "--json", "--jobs", jobid],
check=True, # Throw on non-zero exit code,
capture_output=True
)
return json.loads(sacct_results.stdout.decode("utf8"))
if __name__ == "__main__":
p = argparse.ArgumentParser(description="Load and analzye data from nastja timing files")
p.add_argument("jobid", nargs="+")
p.add_argument("--prettify", action="store_true")
args = p.parse_args()
results = []
for i, jobid in enumerate(args.jobid, 1):
print(f"({i:2}/{len(args.jobid):2}) Loading accounting data for {jobid}", file=sys.stderr)
accounting_data = get_accounting_data(jobid)
array_mean_timings = load_array_mean_timings(jobid)
results.append({
"jobid": jobid,
"means": array_mean_timings.mean().to_dict(),
"stds": array_mean_timings.std().to_dict(),
"accounting": [array_job["steps"][-1] for array_job in accounting_data["jobs"]]
})
print(json.dumps(results, indent=2 if args.prettify else None))

View File

@ -6,7 +6,7 @@
"logfile_path": "/p/project/cellsinsilico/paulslustigebude/ma/experiments/eval/logs/%x-%A.%a",
"nastja_binary_path": "/p/project/cellsinsilico/paulslustigebude/nastja/build-nocuda/nastja",
"config_path": "/p/project/cellsinsilico/paulslustigebude/ma/experiments/eval/generated/config/${SLURM_JOB_NAME}.json",
"output_dir_path": "/p/scratch/cellsinsilico/paul/nastja-out/${SLURM_JOB_NAME}-${SLURM_JOB_ID}.${SLURM_ARRAY_TASK_ID}"
"output_dir_path": "/p/scratch/cellsinsilico/paul/nastja-out/${SLURM_JOB_NAME}-${SLURM_ARRAY_JOB_ID}.${SLURM_ARRAY_TASK_ID}"
},
"batches": [
{
@ -56,6 +56,12 @@
"nodes": 128,
"tasks": 6144,
"time": "00:10:00"
},
{
"name": "strong-cpu-16-16-48",
"nodes": 256,
"tasks": 12288,
"time": "00:10:00"
}
]
}