Compare commits
No commits in common. "4661646b4f4d297aafdc1403617a44f41009007c" and "9d7b0028a603a2f826ee57d23c2317756e6dc383" have entirely different histories.
4661646b4f
...
9d7b0028a6
2
experiments/.gitignore
vendored
2
experiments/.gitignore
vendored
@ -5,5 +5,3 @@ __pycache__
|
|||||||
generated/*
|
generated/*
|
||||||
batch/measurements/strong/*
|
batch/measurements/strong/*
|
||||||
configs/measurements/strong/*
|
configs/measurements/strong/*
|
||||||
*.dat
|
|
||||||
*.db
|
|
||||||
|
@ -4,13 +4,12 @@
|
|||||||
},
|
},
|
||||||
"Application": "Cells",
|
"Application": "Cells",
|
||||||
"Geometry": {
|
"Geometry": {
|
||||||
"blocksize": [180, 180, 180],
|
"blocksize": [90, 90, 60],
|
||||||
"blockcount": [1, 1, 1]
|
"blockcount": [2, 2, 3]
|
||||||
},
|
},
|
||||||
"Settings": {
|
"Settings": {
|
||||||
"timesteps": 100,
|
"timesteps": 100,
|
||||||
"randomseed": 42,
|
"randomseed": 42
|
||||||
"statusoutput": 1
|
|
||||||
},
|
},
|
||||||
"Filling": {
|
"Filling": {
|
||||||
"cells": [
|
"cells": [
|
||||||
@ -96,14 +95,14 @@
|
|||||||
"writer": "ParallelVtkImage",
|
"writer": "ParallelVtkImage",
|
||||||
"outputtype": "UInt32",
|
"outputtype": "UInt32",
|
||||||
"field": "cells",
|
"field": "cells",
|
||||||
"steps": 10
|
"steps": 1
|
||||||
},
|
},
|
||||||
"ParallelVTK_Displacement": {
|
"ParallelVTK_Displacement": {
|
||||||
"writer": "ParallelVtkImage",
|
"writer": "ParallelVtkImage",
|
||||||
"outputtype": "Float32",
|
"outputtype": "Float32",
|
||||||
"field": "dynamicecm",
|
"field": "dynamicecm",
|
||||||
"components": [0, 1, 2],
|
"components": [0, 1, 2],
|
||||||
"steps": 10
|
"steps": 1
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"WriteActions": ["ParallelVTK_Cells", "ParallelVTK_Displacement"]
|
"WriteActions": ["ParallelVTK_Cells", "ParallelVTK_Displacement"]
|
||||||
|
@ -1,51 +1,29 @@
|
|||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
from toolkit import Configuration
|
@dataclass
|
||||||
|
class Configuration:
|
||||||
|
nodes: int
|
||||||
|
tasks: int
|
||||||
|
gpus_per_node: int
|
||||||
|
blockcount: Tuple[int, int, int]
|
||||||
|
blocksize: Tuple[int, int, int]
|
||||||
|
|
||||||
SIZE_X = 400
|
def get_domain_size(self) -> int:
|
||||||
SIZE_Y = 400
|
return self.blockcount[0] * self.blocksize[0] * self.blockcount[1] * self.blocksize[1] * self.blockcount[2] * self.blocksize[2]
|
||||||
SIZE_Z = 400
|
|
||||||
|
|
||||||
def make_config(gpus: int, blockcount: Tuple[int, int, int]) -> Configuration:
|
|
||||||
assert gpus % 4 == 0
|
|
||||||
assert SIZE_X % blockcount[0] == 0
|
|
||||||
assert SIZE_Y % blockcount[1] == 0
|
|
||||||
assert SIZE_Z % blockcount[2] == 0
|
|
||||||
assert blockcount[0] * blockcount[1] * blockcount[2] == gpus
|
|
||||||
|
|
||||||
return Configuration(
|
|
||||||
gpus // 4,
|
|
||||||
gpus,
|
|
||||||
4,
|
|
||||||
blockcount,
|
|
||||||
(SIZE_X // blockcount[0], SIZE_Y // blockcount[1], SIZE_Z // blockcount[2])
|
|
||||||
)
|
|
||||||
|
|
||||||
configurations = [
|
configurations = [
|
||||||
Configuration(1, 1, 1, (1, 1, 1), (400, 400, 400)),
|
Configuration( 1, 12, 1, ( 1, 4, 3), (400, 100, 134)),
|
||||||
Configuration(1, 2, 2, (1, 1, 2), (400, 400, 200)),
|
Configuration( 1, 24, 2, ( 2, 4, 3), (200, 100, 134)),
|
||||||
Configuration(1, 2, 2, (1, 2, 1), (400, 200, 400)),
|
Configuration( 1, 48, 4, ( 4, 4, 3), (100, 100, 134)),
|
||||||
Configuration(1, 2, 2, (2, 1, 1), (200, 400, 400)),
|
Configuration( 2, 96, 4, ( 4, 4, 6), (100, 100, 67)),
|
||||||
make_config(4, (1, 1, 4)),
|
Configuration( 4, 192, 4, ( 4, 8, 6), (100, 50, 67)),
|
||||||
make_config(4, (1, 4, 1)),
|
Configuration( 8, 384, 4, ( 8, 8, 6), ( 50, 50, 67)),
|
||||||
make_config(4, (4, 1, 1)),
|
Configuration(16, 768, 4, ( 8, 16, 6), ( 50, 25, 67)),
|
||||||
make_config(4, (1, 2, 2)),
|
Configuration(32, 1536, 4, (16, 16, 6), ( 25, 25, 67))
|
||||||
make_config(4, (2, 1, 2)),
|
|
||||||
make_config(4, (2, 2, 1)),
|
|
||||||
make_config(8, (2, 2, 2)),
|
|
||||||
make_config(8, (1, 2, 4)),
|
|
||||||
make_config(16, (1, 4, 4)),
|
|
||||||
make_config(16, (2, 2, 4)),
|
|
||||||
make_config(32, (2, 4, 4)),
|
|
||||||
make_config(64, (4, 4, 4)),
|
|
||||||
make_config(128, (4, 4, 8)),
|
|
||||||
make_config(256, (4, 8, 8)),
|
|
||||||
make_config(512, (8, 8, 8)),
|
|
||||||
make_config(1024, (8, 8, 16)),
|
|
||||||
make_config(2048, (8, 16, 16))
|
|
||||||
]
|
]
|
||||||
|
|
||||||
with open("templates/spheroid.json") as template_file:
|
with open("templates/spheroid.json") as template_file:
|
||||||
@ -60,7 +38,10 @@ for c in configurations:
|
|||||||
nastja_config["Geometry"]["blockcount"] = c.blockcount
|
nastja_config["Geometry"]["blockcount"] = c.blockcount
|
||||||
nastja_config["Geometry"]["blocksize"] = c.blocksize
|
nastja_config["Geometry"]["blocksize"] = c.blocksize
|
||||||
|
|
||||||
label = c.get_label()
|
label = f"{c.nodes:02}"
|
||||||
|
|
||||||
|
if c.gpus_per_node < 4:
|
||||||
|
label += f"g{c.gpus_per_node}"
|
||||||
|
|
||||||
with open(f"configs/measurements/strong/spheroid_{label}.json", "w") as config_file:
|
with open(f"configs/measurements/strong/spheroid_{label}.json", "w") as config_file:
|
||||||
json.dump(nastja_config, config_file, indent=2)
|
json.dump(nastja_config, config_file, indent=2)
|
||||||
@ -74,7 +55,7 @@ for c in configurations:
|
|||||||
#SBATCH --ntasks={c.tasks}
|
#SBATCH --ntasks={c.tasks}
|
||||||
# Counted per node
|
# Counted per node
|
||||||
#SBATCH --gres=gpu:{c.gpus_per_node}
|
#SBATCH --gres=gpu:{c.gpus_per_node}
|
||||||
#SBATCH --time=00:30:00
|
#SBATCH --time=06:00:00
|
||||||
#SBATCH --output=logs/strong-{label}-%A_%a.log
|
#SBATCH --output=logs/strong-{label}-%A_%a.log
|
||||||
#SBATCH --error=logs/strong-{label}-%A_%a.log
|
#SBATCH --error=logs/strong-{label}-%A_%a.log
|
||||||
#SBATCH --array=1-5
|
#SBATCH --array=1-5
|
||||||
@ -82,13 +63,13 @@ for c in configurations:
|
|||||||
SOURCE_DIR=/p/project/cellsinsilico/paulslustigebude
|
SOURCE_DIR=/p/project/cellsinsilico/paulslustigebude
|
||||||
OUTPUT_DIR="/p/scratch/cellsinsilico/paul/nastja-out/strong-{label}-${{SLURM_ARRAY_TASK_ID}}"
|
OUTPUT_DIR="/p/scratch/cellsinsilico/paul/nastja-out/strong-{label}-${{SLURM_ARRAY_TASK_ID}}"
|
||||||
|
|
||||||
echo "outdir is ${{OUTPUT_DIR}}"
|
echo "${{OUTPUT_DIR}}"
|
||||||
|
|
||||||
mkdir -p "${{OUTPUT_DIR}}"
|
mkdir -p "${{OUTPUT_DIR}}"
|
||||||
source "${{SOURCE_DIR}}/activate-nastja-modules"
|
source "${{SOURCE_DIR}}/activate-nastja-modules"
|
||||||
|
|
||||||
srun --unbuffered "${{SOURCE_DIR}}/nastja/build-cuda/nastja" \\
|
srun --unbuffered "${{SOURCE_DIR}}/nastja/build-cuda/nastja" \\
|
||||||
-c "${{SOURCE_DIR}}/ma/experiments/configs/measurements/strong/spheroid_{label}.json" \\
|
-c "${{SOURCE_DIR}}/ma/experiments/configs/measurements/strong/spheroid_{c.nodes:02}.json" \\
|
||||||
-o "${{OUTPUT_DIR}}"
|
-o "${{OUTPUT_DIR}}"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -1,16 +0,0 @@
|
|||||||
from dataclasses import dataclass
|
|
||||||
from typing import Tuple
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Configuration:
|
|
||||||
nodes: int
|
|
||||||
tasks: int
|
|
||||||
gpus_per_node: int
|
|
||||||
blockcount: Tuple[int, int, int]
|
|
||||||
blocksize: Tuple[int, int, int]
|
|
||||||
|
|
||||||
def get_domain_size(self) -> int:
|
|
||||||
return self.blockcount[0] * self.blocksize[0] * self.blockcount[1] * self.blocksize[1] * self.blockcount[2] * self.blocksize[2]
|
|
||||||
|
|
||||||
def get_label(self) -> str:
|
|
||||||
return f"t{self.tasks:04}n{self.nodes:03}g{self.gpus_per_node}x{self.blockcount[0]}y{self.blockcount[1]}z{self.blockcount[2]}"
|
|
@ -1,132 +0,0 @@
|
|||||||
import click
|
|
||||||
import csv
|
|
||||||
import re
|
|
||||||
import sqlite3
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Tuple
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TimingData:
|
|
||||||
tasks: int
|
|
||||||
nodes: int
|
|
||||||
gpus_per_node: int
|
|
||||||
blockcount: Tuple[int, int, int]
|
|
||||||
array_index: int
|
|
||||||
|
|
||||||
# Given in seconds
|
|
||||||
timings_by_task: [float]
|
|
||||||
|
|
||||||
RUN_PATTERN = re.compile(r".*t([0-9]+)n([0-9]+)g([0-9]+)x([0-9]+)y([0-9]+)z([0-9]+)-([0-9]+)")
|
|
||||||
TIMING_PATTERN = re.compile(r"timing-([0-9]+)\.dat")
|
|
||||||
TIMING_ROW_PATTERN = re.compile(r"([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+).*")
|
|
||||||
|
|
||||||
def get_timing(timing_path: Path, action: str) -> float:
|
|
||||||
with timing_path.open(encoding="utf8") as timing_file:
|
|
||||||
for line in timing_file:
|
|
||||||
m = TIMING_ROW_PATTERN.match(line)
|
|
||||||
if not m:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if m.group(1) == action:
|
|
||||||
return float(m.group(5))
|
|
||||||
|
|
||||||
raise f"Could not find action '{action}' in {timing_path}"
|
|
||||||
|
|
||||||
def get_timings(d: Path, action: str) -> [float]:
|
|
||||||
timings = []
|
|
||||||
for timing_path in d.iterdir():
|
|
||||||
i = int(TIMING_PATTERN.match(timing_path.name).group(1))
|
|
||||||
timings.append((i, get_timing(timing_path, action) / 1_000_000))
|
|
||||||
|
|
||||||
return [timing for i, timing in sorted(timings, key=lambda t: t[0])]
|
|
||||||
|
|
||||||
def get_outdir_timing_data(d: Path, action: str) -> TimingData:
|
|
||||||
match_results = RUN_PATTERN.match(d.name)
|
|
||||||
tasks = int(match_results.group(1))
|
|
||||||
nodes = int(match_results.group(2))
|
|
||||||
gpus_per_node = int(match_results.group(3))
|
|
||||||
blockcount = (
|
|
||||||
int(match_results.group(4)),
|
|
||||||
int(match_results.group(5)),
|
|
||||||
int(match_results.group(6))
|
|
||||||
)
|
|
||||||
array_index = int(match_results.group(7))
|
|
||||||
timings_by_task = get_timings(d / "timing", action)
|
|
||||||
|
|
||||||
return TimingData(
|
|
||||||
tasks,
|
|
||||||
nodes,
|
|
||||||
gpus_per_node,
|
|
||||||
blockcount,
|
|
||||||
array_index,
|
|
||||||
timings_by_task
|
|
||||||
)
|
|
||||||
|
|
||||||
@click.group()
|
|
||||||
def timing():
|
|
||||||
pass
|
|
||||||
|
|
||||||
@timing.command()
|
|
||||||
@click.argument(
|
|
||||||
"directories",
|
|
||||||
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
|
||||||
nargs=-1
|
|
||||||
)
|
|
||||||
@click.option("--db", default="timings.db", help="Path of sqlite database file")
|
|
||||||
def make_timing_db(directories, db):
|
|
||||||
"""
|
|
||||||
Collect NAStJA timing data from all passed directories and save them into a SQLite database.
|
|
||||||
|
|
||||||
Drops the timings table from the given database and creates a new timings table.
|
|
||||||
|
|
||||||
@param db asjdas
|
|
||||||
"""
|
|
||||||
db = sqlite3.connect(db)
|
|
||||||
c = db.cursor()
|
|
||||||
c.execute("drop table if exists timings")
|
|
||||||
c.execute("create table timings (tasks, blockcount_x, blockcount_y, blockcount_z, array_index, averagetime)")
|
|
||||||
|
|
||||||
print("Collecting timing info...")
|
|
||||||
for d in directories:
|
|
||||||
print(d)
|
|
||||||
t = get_outdir_timing_data(d, "Sweep:DynamicECM")
|
|
||||||
c.executemany(
|
|
||||||
"insert into timings values (?, ?, ?, ?, ?, ?)",
|
|
||||||
[
|
|
||||||
(t.tasks, t.blockcount[0], t.blockcount[1], t.blockcount[2], t.array_index, sum(t.timings_by_task) / len(t.timings_by_task))
|
|
||||||
]
|
|
||||||
)
|
|
||||||
print("Done, committing into DB...")
|
|
||||||
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
print("Done!")
|
|
||||||
|
|
||||||
@timing.command()
|
|
||||||
@click.option("--db", default="timings.db", help="Path of sqlite database file")
|
|
||||||
@click.option("--time/--no-time", default=False, help="Print average time of best run instead of speedup")
|
|
||||||
def strong_dat(db, time):
|
|
||||||
db = sqlite3.connect(db)
|
|
||||||
c = db.cursor()
|
|
||||||
res = c.execute("""
|
|
||||||
select tasks, min(avg)
|
|
||||||
from (
|
|
||||||
select tasks, blockcount_x, blockcount_y, blockcount_z, sum(averagetime) / count(*) as avg
|
|
||||||
from timings group by tasks, blockcount_x, blockcount_y, blockcount_z
|
|
||||||
) group by tasks order by tasks asc;
|
|
||||||
""")
|
|
||||||
values = res.fetchall()
|
|
||||||
|
|
||||||
if not time:
|
|
||||||
print("gpus\tspeedup")
|
|
||||||
for tasks, time in values:
|
|
||||||
print(f"{tasks}\t{values[0][1] / time}")
|
|
||||||
else:
|
|
||||||
print("gpus\ttime")
|
|
||||||
for tasks, time in values:
|
|
||||||
print(f"{tasks}\t{time}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
timing()
|
|
@ -82,15 +82,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"Geometry": {
|
"Geometry": {
|
||||||
"blockcount": null,
|
"blockcount": [4, 4, 3],
|
||||||
"blocksize": null
|
"blocksize": [100, 100, 134]
|
||||||
},
|
},
|
||||||
"Settings": {
|
"Settings": {
|
||||||
"randomseed": 0,
|
"randomseed": 0,
|
||||||
"timesteps": 20,
|
"timesteps": 100
|
||||||
"statusoutput": 1
|
|
||||||
},
|
},
|
||||||
"WriteActions": [],
|
"WriteActions": ["CellInfo"],
|
||||||
"Writers": {
|
"Writers": {
|
||||||
"CellInfo": {
|
"CellInfo": {
|
||||||
"field": "",
|
"field": "",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user