Add GPU strong scaling measurement files

This commit is contained in:
Paul Brinkmeier 2024-03-09 18:49:03 +01:00
parent 1696d72a6f
commit 075d11b948
7 changed files with 12684 additions and 0 deletions

View File

@ -1,6 +1,9 @@
generate-batch-strong-cpu:
python scripts/substitute.py strong-batch.j2 < strong-cpu.json
generate-batch-strong-gpu:
python scripts/substitute.py strong-batch.j2 < strong-gpu.json
clean-logs:
rm logs/*

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
label nodes tasks mean_time std_time speedup speedup_std
1 1 1 157.5217688 0.5311809049610176 1.0 0.003372111099358209
1 1 2 120.15237200000001 0.299200156243067 1.3110167213344732 0.004420894037456185
1 1 4 75.69762635 0.5769735213369493 2.0809340582447473 0.007017140834839633
2 2 8 52.110670325 0.940887445857671 3.022831366735062 0.010193323203255446
4 4 16 28.215493675000005 0.5598524922936691 5.582811012077745 0.018825858979446598
8 8 32 12.422501156249998 0.31940690526235005 12.680358554102268 0.04275957782413007
16 16 64 7.70770194375 0.04068644910316383 20.43693048194875 0.06891560011499148
1 label nodes tasks mean_time std_time speedup speedup_std
2 1 1 1 157.5217688 0.5311809049610176 1.0 0.003372111099358209
3 1 1 2 120.15237200000001 0.299200156243067 1.3110167213344732 0.004420894037456185
4 1 1 4 75.69762635 0.5769735213369493 2.0809340582447473 0.007017140834839633
5 2 2 8 52.110670325 0.940887445857671 3.022831366735062 0.010193323203255446
6 4 4 16 28.215493675000005 0.5598524922936691 5.582811012077745 0.018825858979446598
7 8 8 32 12.422501156249998 0.31940690526235005 12.680358554102268 0.04275957782413007
8 16 16 64 7.70770194375 0.04068644910316383 20.43693048194875 0.06891560011499148

View File

@ -27,6 +27,7 @@ if __name__ == "__main__":
scaling_spec = {
"label": lambda job: job["accounting"][0]["nodes"]["count"],
"nodes": lambda job: job["accounting"][0]["nodes"]["count"],
"tasks": lambda job: job["accounting"][0]["tasks"]["count"],
"mean_time": lambda job: job["means"]["TimeStep"],
"std_time": lambda job: job["stds"]["TimeStep"],
"speedup": lambda job: jobs[0]["means"]["TimeStep"] / job["means"]["TimeStep"],

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
import copy
import json
from pathlib import Path
SIZE = [384, 384, 384]
with (Path(__file__).parent.parent / "templates" / "strong-gpu.json").open(encoding="utf8") as f:
template = json.load(f)
configs = [
[ 1, 1, 1],
[ 1, 1, 2],
[ 1, 2, 2],
[ 2, 2, 2],
[ 2, 2, 4],
[ 2, 4, 4],
[ 4, 4, 4],
[ 4, 4, 8],
]
out_path = Path(__file__).parent.parent / "generated" / "config"
for c in configs:
nc = copy.deepcopy(template)
nc["Geometry"]["blockcount"] = c
nc["Geometry"]["blocksize"] = [bs // bc for bc, bs in zip(c, SIZE)]
nc_out_path = out_path / f"strong-gpu-{c[0]:02}-{c[1]:02}-{c[2]:02}.json"
print(f"Dumping {(c[0] * c[1] * c[2])} to {nc_out_path}")
with nc_out_path.open("w", encoding="utf8") as f:
json.dump(nc, f)

View File

@ -0,0 +1,63 @@
{
"common": {
"account": "cellsinsilico",
"partition": "gpus",
"extra_sbatch_line": "#SBATCH --gres=gpu:4",
"logfile_path": "/p/project/cellsinsilico/paulslustigebude/ma/experiments/eval/logs/%x-%A.%a",
"nastja_binary_path": "/p/project/cellsinsilico/paulslustigebude/nastja/build-cuda/nastja",
"config_path": "/p/project/cellsinsilico/paulslustigebude/ma/experiments/eval/generated/config/${SLURM_JOB_NAME}.json",
"output_dir_path": "/p/scratch/cellsinsilico/paul/nastja-out/${SLURM_JOB_NAME}-${SLURM_ARRAY_JOB_ID}.${SLURM_ARRAY_TASK_ID}"
},
"batches": [
{
"name": "strong-gpu-01-01-01",
"nodes": 1,
"tasks": 1,
"time": "00:15:00",
"extra_sbatch_line": "#SBATCH --gres=gpu:1"
},
{
"name": "strong-gpu-01-01-02",
"nodes": 1,
"tasks": 2,
"time": "00:15:00",
"extra_sbatch_line": "#SBATCH --gres=gpu:2"
},
{
"name": "strong-gpu-01-02-02",
"nodes": 1,
"tasks": 4,
"time": "00:15:00"
},
{
"name": "strong-gpu-02-02-02",
"nodes": 2,
"tasks": 8,
"time": "00:15:00"
},
{
"name": "strong-gpu-02-02-04",
"nodes": 4,
"tasks": 16,
"time": "00:15:00"
},
{
"name": "strong-gpu-02-04-04",
"nodes": 8,
"tasks": 32,
"time": "00:15:00"
},
{
"name": "strong-gpu-04-04-04",
"nodes": 16,
"tasks": 64,
"time": "00:15:00"
},
{
"name": "strong-gpu-04-04-08",
"nodes": 32,
"tasks": 128,
"time": "00:15:00"
}
]
}

View File

@ -0,0 +1,263 @@
{
"Application": "Cells",
"CellsInSilico": {
"2D": false,
"adhesion": {
"matrix": [
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 450.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 450.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0]
],
"polarityenabled": false
},
"centerofmass": {
"steps": 1
},
"cleaner": {
"killdistance": 0,
"steps": 100
},
"contactinhibition": {
"enabled": false
},
"division": {
"condition": [
"",
"",
"",
"",
"",
"",
"",
"",
"",
"( volume >= 0.9 * volume0 ) & ( rnd() <= 0.00001 ) & generation < 1"
],
"enabled": true,
"halveSignals": false
},
"dynamicecm": {
"alpha": 2.0,
"beta": 0.5,
"c": 4.0,
"deltat": 0.10000000149011612,
"ecmCellID": 0,
"enabled": true,
"eta": 0.25,
"k0": 0.10000000149011612,
"k1": 0.10000000149011612,
"lambda": 10.0,
"phi": 1.0,
"pushSteps": 10,
"pushWeight": 0.5,
"stepsPerMcs": 100
},
"ecmdegradation": {
"enabled": false
},
"energyfunctions": [
"Volume00",
"Surface01",
"Motility00",
"Adhesion01",
"DynamicECM00"
],
"liquid": 6,
"logcellproperties": {
"enabled": false
},
"orientation": {
"enabled": true,
"motility": "persistentRandomWalk",
"motilityamount": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"numRandomNumbers": 5,
"persistenceMagnitude": 0.0,
"persistentDecay": 0.8,
"recalculationtime": 200
},
"polarity": {
"enabled": false
},
"signaling": {
"constant": false,
"enabled": false
},
"surface": {
"default": {
"storage": "const",
"value": 400.0
},
"lambda": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
5.625,
5.625,
1.0
],
"sizechange": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
-0.05,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
]
},
"temperature": 50.0,
"visitor": {
"checkerboard": "01",
"stepwidth": 10
},
"volume": {
"default": {
"storage": "const",
"value": 500.0
},
"lambda": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
7.5,
7.5,
7.5
],
"sizechange": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
-0.05,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
]
}
},
"DefineFunctions": [
"r_angle()=360*rnd()",
"r_size()=400*rnd()"
],
"Filling": {
"cells": [
{
"box": [
[
0,
0,
0
],
[
384,
384,
384
]
],
"celltype": 0,
"component": 0,
"pattern": "const",
"seed": 0,
"shape": "cube",
"value": 0
},
{
"box": [
[117, 117, 177],
[267, 267, 267]
],
"celltype": 9,
"center": [192, 192, 192],
"component": 0,
"count": 5500,
"pattern": "voronoi",
"radius": 75,
"seed": 758960,
"shape": "sphere",
"value": 8
}
],
"initialoutput": false,
"randomseed": 758959
},
"Geometry": {
"blockcount": [
4,
4,
3
],
"blockdefault": "fill",
"blocksize": [
96,
96,
128
],
"blocktype": [
[
[
1
]
]
]
},
"Settings": {
"deltat": 1.0,
"deltax": 1.0,
"handleFPE": "signal",
"logger": {
"group": 0,
"steps": 100
},
"randomseed": 42,
"statusoutput": 1,
"timestepguard": 1,
"timesteps": 5,
"cuda": {
"subblocks": {
"blockDim": [8, 8, 8]
}
}
}
}