Add booster strong scaling configs

This commit is contained in:
Paul Brinkmeier 2024-03-10 16:54:00 +01:00
parent 59d5935567
commit d41d8b564b
10 changed files with 400 additions and 20 deletions

View File

@ -4,6 +4,9 @@ generate-batch-strong-cpu:
generate-batch-strong-gpu:
python scripts/substitute.py strong-batch.j2 < strong-gpu.json
generate-batch-strong-booster:
python scripts/substitute.py strong-batch.j2 < strong-booster.json
clean-logs:
rm logs/*

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,9 @@
label nodes tasks mean_time std_time speedup speedup_std speedup_error
1 1 1 161.08257980000002 1.0725827874500897 1.0 0.0066585895804611986 0.004614167168572558
1 1 2 109.2771463 1.3498701967829918 1.474073813730255 0.018208823861030076 0.012618071172974463
1 1 4 94.5763083 2.921107861707173 1.7032022363258181 0.052605536545441436 0.03645377698685362
2 2 8 75.06436665 5.345617315509918 2.145926049720424 0.15281950626022228 0.10589851499069955
4 4 16 42.842002025 1.5314070839490197 3.759921856733072 0.13440013758310768 0.09313454370387199
8 8 32 16.40522446875 4.395440141154071 9.818980539208297 2.6307924825691393 1.823046180232846
16 16 64 9.361684956249999 5.010216046116417 17.206579857449583 9.208671612370724 6.381283859970849
32 32 128 5.214438346875 0.113661681579046 30.891645290338204 0.6733604113945479 0.4666149588180552
1 label nodes tasks mean_time std_time speedup speedup_std speedup_error
2 1 1 1 161.08257980000002 1.0725827874500897 1.0 0.0066585895804611986 0.004614167168572558
3 1 1 2 109.2771463 1.3498701967829918 1.474073813730255 0.018208823861030076 0.012618071172974463
4 1 1 4 94.5763083 2.921107861707173 1.7032022363258181 0.052605536545441436 0.03645377698685362
5 2 2 8 75.06436665 5.345617315509918 2.145926049720424 0.15281950626022228 0.10589851499069955
6 4 4 16 42.842002025 1.5314070839490197 3.759921856733072 0.13440013758310768 0.09313454370387199
7 8 8 32 16.40522446875 4.395440141154071 9.818980539208297 2.6307924825691393 1.823046180232846
8 16 16 64 9.361684956249999 5.010216046116417 17.206579857449583 9.208671612370724 6.381283859970849
9 32 32 128 5.214438346875 0.113661681579046 30.891645290338204 0.6733604113945479 0.4666149588180552

View File

@ -1,10 +1,10 @@
label nodes tasks mean_time std_time speedup speedup_std
1 1 48 1953.3832839708334 14.087268287606769 1.0 0.007211727674340593
2 2 96 1046.3540047312501 6.0880471399828595 1.866847429396085 0.013463195270347342
4 4 192 566.0709002166666 5.005298021298787 3.4507749527897755 0.024886049224955378
8 8 384 318.1225953708333 9.706195475255436 6.1403475024896865 0.04428251401377301
16 16 768 178.56471887994792 9.393026972005948 10.939357428631386 0.07889166670756431
32 32 1536 99.82801128216144 1.4413062581268035 19.56748670921274 0.14111538541812124
64 64 3072 55.75610567220052 1.0303316198245824 35.034428255357376 0.25265875580386077
128 128 6144 31.589132888118492 0.32226687791817415 61.83719226764697 0.4459529907801098
256 256 12288 19.11704500200738 0.18528928846832715 102.18018965617955 0.7368957015128405
label nodes tasks mean_time std_time speedup speedup_std speedup_error
1 1 48 1953.3832839708334 14.087268287606769 1.0 0.007211727674340593 0.004711662080569187
2 2 96 1046.3540047312501 6.0880471399828595 1.866847429396085 0.010861959816590314 0.007096480413505672
4 4 192 566.0709002166666 5.005298021298787 3.4507749527897755 0.030512356378918384 0.019934739500893344
8 8 384 318.1225953708333 9.706195475255436 6.1403475024896865 0.18734731205020752 0.12240024387280224
16 16 768 178.56471887994792 9.393026972005948 10.939357428631386 0.5754422263707719 0.375955587895571
32 32 1536 99.82801128216144 1.4413062581268035 19.56748670921274 0.28251330150299214 0.18457535698195485
64 64 3072 55.75610567220052 1.0303316198245824 35.034428255357376 0.6474103379133267 0.42297475410337343
128 128 6144 31.589132888118492 0.32226687791817415 61.83719226764697 0.6308523555205254 0.41215687227340997
256 256 12288 19.11704500200738 0.18528928846832715 102.18018965617955 0.9903672160087601 0.6470399144590566

1 label nodes tasks mean_time std_time speedup speedup_std speedup_error
2 1 1 48 1953.3832839708334 14.087268287606769 1.0 0.007211727674340593 0.004711662080569187
3 2 2 96 1046.3540047312501 6.0880471399828595 1.866847429396085 0.013463195270347342 0.010861959816590314 0.007096480413505672
4 4 4 192 566.0709002166666 5.005298021298787 3.4507749527897755 0.024886049224955378 0.030512356378918384 0.019934739500893344
5 8 8 384 318.1225953708333 9.706195475255436 6.1403475024896865 0.04428251401377301 0.18734731205020752 0.12240024387280224
6 16 16 768 178.56471887994792 9.393026972005948 10.939357428631386 0.07889166670756431 0.5754422263707719 0.375955587895571
7 32 32 1536 99.82801128216144 1.4413062581268035 19.56748670921274 0.14111538541812124 0.28251330150299214 0.18457535698195485
8 64 64 3072 55.75610567220052 1.0303316198245824 35.034428255357376 0.25265875580386077 0.6474103379133267 0.42297475410337343
9 128 128 6144 31.589132888118492 0.32226687791817415 61.83719226764697 0.4459529907801098 0.6308523555205254 0.41215687227340997
10 256 256 12288 19.11704500200738 0.18528928846832715 102.18018965617955 0.7368957015128405 0.9903672160087601 0.6470399144590566

View File

@ -1,9 +1,9 @@
label nodes tasks mean_time std_time speedup speedup_std
1 1 1 316.56674059999995 1.3298279828832311 1.0 0.004200782370133899
1 1 2 239.7297573 1.3705192585818793 1.3205150005797797 0.005547196133932895
1 1 4 150.86725470000002 1.463195211854327 2.098313124537819 0.008814556780579045
2 2 8 103.758313 1.3959932139830615 3.051001230137579 0.012816592178818781
4 4 16 56.423368849999996 0.8293394846143929 5.610560784514376 0.02356874483015261
8 8 32 24.168551143749998 0.5503559531334744 13.09829202078025 0.055023074199759194
16 16 64 15.05716248125 0.04648043269670786 21.024329185144023 0.08831863138484462
32 32 128 9.9111531140625 0.08848604506395903 31.940455056721632 0.1341749004963304
label nodes tasks mean_time std_time speedup speedup_std speedup_error
1 1 1 316.56674059999995 1.3298279828832311 1.0 0.004200782370133899 0.002910993666206366
1 1 2 239.7297573 1.3705192585818793 1.3205150005797797 0.007549297425250635 0.00523139621453709
1 1 4 150.86725470000002 1.463195211854327 2.098313124537819 0.020350616990413285 0.014102258089746373
2 2 8 103.758313 1.3959932139830615 3.051001230137579 0.041049019495199714 0.028445519245192213
4 4 16 56.423368849999996 0.8293394846143929 5.610560784514376 0.08246688711191473 0.057146637198176795
8 8 32 24.168551143749998 0.5503559531334744 13.09829202078025 0.29826872726631265 0.20668968287257275
16 16 64 15.05716248125 0.04648043269670786 21.024329185144023 0.06490066896072916 0.044973869063161365
32 32 128 9.9111531140625 0.08848604506395903 31.940455056721632 0.28516203039001964 0.19760720531719309

1 label nodes tasks mean_time std_time speedup speedup_std speedup_error
2 1 1 1 316.56674059999995 1.3298279828832311 1.0 0.004200782370133899 0.002910993666206366
3 1 1 2 239.7297573 1.3705192585818793 1.3205150005797797 0.005547196133932895 0.007549297425250635 0.00523139621453709
4 1 1 4 150.86725470000002 1.463195211854327 2.098313124537819 0.008814556780579045 0.020350616990413285 0.014102258089746373
5 2 2 8 103.758313 1.3959932139830615 3.051001230137579 0.012816592178818781 0.041049019495199714 0.028445519245192213
6 4 4 16 56.423368849999996 0.8293394846143929 5.610560784514376 0.02356874483015261 0.08246688711191473 0.057146637198176795
7 8 8 32 24.168551143749998 0.5503559531334744 13.09829202078025 0.055023074199759194 0.29826872726631265 0.20668968287257275
8 16 16 64 15.05716248125 0.04648043269670786 21.024329185144023 0.08831863138484462 0.06490066896072916 0.044973869063161365
9 32 32 128 9.9111531140625 0.08848604506395903 31.940455056721632 0.1341749004963304 0.28516203039001964 0.19760720531719309

View File

@ -3,6 +3,7 @@
import argparse
import json
import math
def print_table(data, spec):
@ -31,6 +32,9 @@ if __name__ == "__main__":
"mean_time": lambda job: job["means"]["TimeStep"],
"std_time": lambda job: job["stds"]["TimeStep"],
"speedup": lambda job: jobs[0]["means"]["TimeStep"] / job["means"]["TimeStep"],
"speedup_std": lambda job: jobs[0]["stds"]["TimeStep"] / job["means"]["TimeStep"],
# Standard deviation scaled to speedup
"speedup_std": lambda job: (jobs[0]["means"]["TimeStep"] / job["means"]["TimeStep"]) * (job["stds"]["TimeStep"] / job["means"]["TimeStep"]),
# 95% confidence interval
"speedup_error": lambda job: (jobs[0]["means"]["TimeStep"] / job["means"]["TimeStep"]) * (job["stds"]["TimeStep"] / job["means"]["TimeStep"]) / math.sqrt(len(jobs)) * 1.96,
}
print_table(jobs, scaling_spec)

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
import copy
import json
from pathlib import Path
SIZE = [384, 384, 384]
with (Path(__file__).parent.parent / "templates" / "strong-booster.json").open(encoding="utf8") as f:
template = json.load(f)
configs = [
[ 1, 1, 1],
[ 1, 1, 2],
[ 1, 2, 2],
[ 2, 2, 2],
[ 2, 2, 4],
[ 2, 4, 4],
[ 4, 4, 4],
[ 4, 4, 8],
]
out_path = Path(__file__).parent.parent / "generated" / "config"
for c in configs:
nc = copy.deepcopy(template)
nc["Geometry"]["blockcount"] = c
nc["Geometry"]["blocksize"] = [bs // bc for bc, bs in zip(c, SIZE)]
nc_out_path = out_path / f"strong-booster-{c[0]:02}-{c[1]:02}-{c[2]:02}.json"
print(f"Dumping {(c[0] * c[1] * c[2])} to {nc_out_path}")
with nc_out_path.open("w", encoding="utf8") as f:
json.dump(nc, f)

View File

@ -76,6 +76,7 @@ if __name__ == "__main__":
p = argparse.ArgumentParser(description="Load and analzye data from nastja timing files")
p.add_argument("jobid", nargs="+")
p.add_argument("--prettify", action="store_true")
p.add_argument("--dump-timings", action="store_true")
args = p.parse_args()
results = []
@ -99,6 +100,8 @@ if __name__ == "__main__":
jobs.append(last_step)
array_mean_timings = load_array_mean_timings(jobid, excluded_array_indices)
if args.dump_timings:
print(array_mean_timings, file=sys.stderr)
results.append({
"jobid": jobid,

View File

@ -0,0 +1,63 @@
{
"common": {
"account": "hkf6",
"partition": "booster",
"extra_sbatch_line": "#SBATCH --gres=gpu:4",
"logfile_path": "/p/project/cellsinsilico/paulslustigebude/ma/experiments/eval/logs/%x-%A.%a",
"nastja_binary_path": "/p/project/cellsinsilico/paulslustigebude/nastja/build-cuda/nastja",
"config_path": "/p/project/cellsinsilico/paulslustigebude/ma/experiments/eval/generated/config/${SLURM_JOB_NAME}.json",
"output_dir_path": "/p/scratch/cellsinsilico/paul/nastja-out/${SLURM_JOB_NAME}-${SLURM_ARRAY_JOB_ID}.${SLURM_ARRAY_TASK_ID}"
},
"batches": [
{
"name": "strong-booster-01-01-01",
"nodes": 1,
"tasks": 1,
"time": "00:15:00",
"extra_sbatch_line": "#SBATCH --gres=gpu:1"
},
{
"name": "strong-booster-01-01-02",
"nodes": 1,
"tasks": 2,
"time": "00:15:00",
"extra_sbatch_line": "#SBATCH --gres=gpu:2"
},
{
"name": "strong-booster-01-02-02",
"nodes": 1,
"tasks": 4,
"time": "00:15:00"
},
{
"name": "strong-booster-02-02-02",
"nodes": 2,
"tasks": 8,
"time": "00:15:00"
},
{
"name": "strong-booster-02-02-04",
"nodes": 4,
"tasks": 16,
"time": "00:15:00"
},
{
"name": "strong-booster-02-04-04",
"nodes": 8,
"tasks": 32,
"time": "00:15:00"
},
{
"name": "strong-booster-04-04-04",
"nodes": 16,
"tasks": 64,
"time": "00:15:00"
},
{
"name": "strong-booster-04-04-08",
"nodes": 32,
"tasks": 128,
"time": "00:15:00"
}
]
}

View File

@ -0,0 +1,263 @@
{
"Application": "Cells",
"CellsInSilico": {
"2D": false,
"adhesion": {
"matrix": [
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 450.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 450.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0]
],
"polarityenabled": false
},
"centerofmass": {
"steps": 1
},
"cleaner": {
"killdistance": 0,
"steps": 100
},
"contactinhibition": {
"enabled": false
},
"division": {
"condition": [
"",
"",
"",
"",
"",
"",
"",
"",
"",
"( volume >= 0.9 * volume0 ) & ( rnd() <= 0.00001 ) & generation < 1"
],
"enabled": true,
"halveSignals": false
},
"dynamicecm": {
"alpha": 2.0,
"beta": 0.5,
"c": 4.0,
"deltat": 0.10000000149011612,
"ecmCellID": 0,
"enabled": true,
"eta": 0.25,
"k0": 0.10000000149011612,
"k1": 0.10000000149011612,
"lambda": 10.0,
"phi": 1.0,
"pushSteps": 10,
"pushWeight": 0.5,
"stepsPerMcs": 100
},
"ecmdegradation": {
"enabled": false
},
"energyfunctions": [
"Volume00",
"Surface01",
"Motility00",
"Adhesion01",
"DynamicECM00"
],
"liquid": 6,
"logcellproperties": {
"enabled": false
},
"orientation": {
"enabled": true,
"motility": "persistentRandomWalk",
"motilityamount": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"numRandomNumbers": 5,
"persistenceMagnitude": 0.0,
"persistentDecay": 0.8,
"recalculationtime": 200
},
"polarity": {
"enabled": false
},
"signaling": {
"constant": false,
"enabled": false
},
"surface": {
"default": {
"storage": "const",
"value": 400.0
},
"lambda": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
5.625,
5.625,
1.0
],
"sizechange": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
-0.05,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
]
},
"temperature": 50.0,
"visitor": {
"checkerboard": "01",
"stepwidth": 10
},
"volume": {
"default": {
"storage": "const",
"value": 500.0
},
"lambda": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
7.5,
7.5,
7.5
],
"sizechange": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
-0.05,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
]
}
},
"DefineFunctions": [
"r_angle()=360*rnd()",
"r_size()=400*rnd()"
],
"Filling": {
"cells": [
{
"box": [
[
0,
0,
0
],
[
384,
384,
384
]
],
"celltype": 0,
"component": 0,
"pattern": "const",
"seed": 0,
"shape": "cube",
"value": 0
},
{
"box": [
[117, 117, 177],
[267, 267, 267]
],
"celltype": 9,
"center": [192, 192, 192],
"component": 0,
"count": 5500,
"pattern": "voronoi",
"radius": 75,
"seed": 758960,
"shape": "sphere",
"value": 8
}
],
"initialoutput": false,
"randomseed": 758959
},
"Geometry": {
"blockcount": [
4,
4,
3
],
"blockdefault": "fill",
"blocksize": [
96,
96,
128
],
"blocktype": [
[
[
1
]
]
]
},
"Settings": {
"deltat": 1.0,
"deltax": 1.0,
"handleFPE": "signal",
"logger": {
"group": 0,
"steps": 100
},
"randomseed": 42,
"statusoutput": 1,
"timestepguard": 1,
"timesteps": 10,
"cuda": {
"subblocks": {
"blockDim": [8, 8, 8]
}
}
}
}