Compare commits
	
		
			3 Commits
		
	
	
		
			9d7b0028a6
			...
			4661646b4f
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 4661646b4f | |||
| f563cbf9e4 | |||
| 6dbbf45043 | 
							
								
								
									
										2
									
								
								experiments/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								experiments/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -5,3 +5,5 @@ __pycache__ | ||||
| generated/* | ||||
| batch/measurements/strong/* | ||||
| configs/measurements/strong/* | ||||
| *.dat | ||||
| *.db | ||||
|  | ||||
| @ -4,12 +4,13 @@ | ||||
|   }, | ||||
|   "Application": "Cells", | ||||
|   "Geometry": { | ||||
|     "blocksize": [90, 90, 60], | ||||
|     "blockcount": [2, 2, 3] | ||||
|     "blocksize": [180, 180, 180], | ||||
|     "blockcount": [1, 1, 1] | ||||
|   }, | ||||
|   "Settings": { | ||||
|     "timesteps": 100, | ||||
|     "randomseed": 42 | ||||
|     "randomseed": 42, | ||||
|     "statusoutput": 1 | ||||
|   }, | ||||
|   "Filling": { | ||||
|     "cells": [ | ||||
| @ -95,14 +96,14 @@ | ||||
|       "writer": "ParallelVtkImage", | ||||
|       "outputtype": "UInt32", | ||||
|       "field": "cells", | ||||
|       "steps": 1 | ||||
|       "steps": 10 | ||||
|     }, | ||||
|     "ParallelVTK_Displacement": { | ||||
|       "writer": "ParallelVtkImage", | ||||
|       "outputtype": "Float32", | ||||
|       "field": "dynamicecm", | ||||
|       "components": [0, 1, 2], | ||||
|       "steps": 1 | ||||
|       "steps": 10 | ||||
|     } | ||||
|   }, | ||||
|   "WriteActions": ["ParallelVTK_Cells", "ParallelVTK_Displacement"] | ||||
|  | ||||
| @ -1,29 +1,51 @@ | ||||
| import copy | ||||
| import json | ||||
| 
 | ||||
| from dataclasses import dataclass | ||||
| from typing import Tuple | ||||
| 
 | ||||
| @dataclass | ||||
| class Configuration: | ||||
|     nodes: int | ||||
|     tasks: int | ||||
|     gpus_per_node: int | ||||
|     blockcount: Tuple[int, int, int] | ||||
|     blocksize: Tuple[int, int, int] | ||||
| from toolkit import Configuration | ||||
| 
 | ||||
|     def get_domain_size(self) -> int: | ||||
|         return self.blockcount[0] * self.blocksize[0] * self.blockcount[1] * self.blocksize[1] * self.blockcount[2] * self.blocksize[2] | ||||
| SIZE_X = 400 | ||||
| SIZE_Y = 400 | ||||
| SIZE_Z = 400 | ||||
| 
 | ||||
| def make_config(gpus: int, blockcount: Tuple[int, int, int]) -> Configuration: | ||||
|     assert gpus % 4 == 0 | ||||
|     assert SIZE_X % blockcount[0] == 0 | ||||
|     assert SIZE_Y % blockcount[1] == 0 | ||||
|     assert SIZE_Z % blockcount[2] == 0 | ||||
|     assert blockcount[0] * blockcount[1] * blockcount[2] == gpus | ||||
| 
 | ||||
|     return Configuration( | ||||
|         gpus // 4, | ||||
|         gpus, | ||||
|         4, | ||||
|         blockcount, | ||||
|         (SIZE_X // blockcount[0], SIZE_Y // blockcount[1], SIZE_Z // blockcount[2]) | ||||
|     ) | ||||
| 
 | ||||
| configurations = [ | ||||
|     Configuration( 1,   12,   1, ( 1,  4, 3), (400, 100, 134)), | ||||
|     Configuration( 1,   24,   2, ( 2,  4, 3), (200, 100, 134)), | ||||
|     Configuration( 1,   48,   4, ( 4,  4, 3), (100, 100, 134)), | ||||
|     Configuration( 2,   96,   4, ( 4,  4, 6), (100, 100,  67)), | ||||
|     Configuration( 4,  192,   4, ( 4,  8, 6), (100,  50,  67)), | ||||
|     Configuration( 8,  384,   4, ( 8,  8, 6), ( 50,  50,  67)), | ||||
|     Configuration(16,  768,   4, ( 8, 16, 6), ( 50,  25,  67)), | ||||
|     Configuration(32, 1536,   4, (16, 16, 6), ( 25,  25,  67)) | ||||
|     Configuration(1, 1, 1, (1, 1, 1), (400, 400, 400)), | ||||
|     Configuration(1, 2, 2, (1, 1, 2), (400, 400, 200)), | ||||
|     Configuration(1, 2, 2, (1, 2, 1), (400, 200, 400)), | ||||
|     Configuration(1, 2, 2, (2, 1, 1), (200, 400, 400)), | ||||
|     make_config(4, (1, 1, 4)), | ||||
|     make_config(4, (1, 4, 1)), | ||||
|     make_config(4, (4, 1, 1)), | ||||
|     make_config(4, (1, 2, 2)), | ||||
|     make_config(4, (2, 1, 2)), | ||||
|     make_config(4, (2, 2, 1)), | ||||
|     make_config(8, (2, 2, 2)), | ||||
|     make_config(8, (1, 2, 4)), | ||||
|     make_config(16, (1, 4, 4)), | ||||
|     make_config(16, (2, 2, 4)), | ||||
|     make_config(32, (2, 4, 4)), | ||||
|     make_config(64, (4, 4, 4)), | ||||
|     make_config(128, (4, 4, 8)), | ||||
|     make_config(256, (4, 8, 8)), | ||||
|     make_config(512, (8, 8, 8)), | ||||
|     make_config(1024, (8, 8, 16)), | ||||
|     make_config(2048, (8, 16, 16)) | ||||
| ] | ||||
| 
 | ||||
| with open("templates/spheroid.json") as template_file: | ||||
| @ -38,10 +60,7 @@ for c in configurations: | ||||
|     nastja_config["Geometry"]["blockcount"] = c.blockcount | ||||
|     nastja_config["Geometry"]["blocksize"] = c.blocksize | ||||
| 
 | ||||
|     label = f"{c.nodes:02}" | ||||
| 
 | ||||
|     if c.gpus_per_node < 4: | ||||
|         label += f"g{c.gpus_per_node}" | ||||
|     label = c.get_label() | ||||
| 
 | ||||
|     with open(f"configs/measurements/strong/spheroid_{label}.json", "w") as config_file: | ||||
|         json.dump(nastja_config, config_file, indent=2) | ||||
| @ -55,7 +74,7 @@ for c in configurations: | ||||
| #SBATCH --ntasks={c.tasks} | ||||
| # Counted per node | ||||
| #SBATCH --gres=gpu:{c.gpus_per_node} | ||||
| #SBATCH --time=06:00:00 | ||||
| #SBATCH --time=00:30:00 | ||||
| #SBATCH --output=logs/strong-{label}-%A_%a.log | ||||
| #SBATCH --error=logs/strong-{label}-%A_%a.log | ||||
| #SBATCH --array=1-5 | ||||
| @ -63,13 +82,13 @@ for c in configurations: | ||||
| SOURCE_DIR=/p/project/cellsinsilico/paulslustigebude | ||||
| OUTPUT_DIR="/p/scratch/cellsinsilico/paul/nastja-out/strong-{label}-${{SLURM_ARRAY_TASK_ID}}" | ||||
| 
 | ||||
| echo "${{OUTPUT_DIR}}" | ||||
| echo "outdir is ${{OUTPUT_DIR}}" | ||||
| 
 | ||||
| mkdir -p "${{OUTPUT_DIR}}" | ||||
| source "${{SOURCE_DIR}}/activate-nastja-modules" | ||||
| 
 | ||||
| srun --unbuffered "${{SOURCE_DIR}}/nastja/build-cuda/nastja" \\ | ||||
|   -c "${{SOURCE_DIR}}/ma/experiments/configs/measurements/strong/spheroid_{c.nodes:02}.json" \\ | ||||
|   -c "${{SOURCE_DIR}}/ma/experiments/configs/measurements/strong/spheroid_{label}.json" \\ | ||||
|   -o "${{OUTPUT_DIR}}" | ||||
| """ | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										16
									
								
								experiments/scripts/gen/toolkit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								experiments/scripts/gen/toolkit.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| from dataclasses import dataclass | ||||
| from typing import Tuple | ||||
| 
 | ||||
| @dataclass | ||||
| class Configuration: | ||||
|     nodes: int | ||||
|     tasks: int | ||||
|     gpus_per_node: int | ||||
|     blockcount: Tuple[int, int, int] | ||||
|     blocksize: Tuple[int, int, int] | ||||
| 
 | ||||
|     def get_domain_size(self) -> int: | ||||
|         return self.blockcount[0] * self.blocksize[0] * self.blockcount[1] * self.blocksize[1] * self.blockcount[2] * self.blocksize[2] | ||||
| 
 | ||||
|     def get_label(self) -> str: | ||||
|         return f"t{self.tasks:04}n{self.nodes:03}g{self.gpus_per_node}x{self.blockcount[0]}y{self.blockcount[1]}z{self.blockcount[2]}" | ||||
							
								
								
									
										132
									
								
								experiments/scripts/timing.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								experiments/scripts/timing.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,132 @@ | ||||
| import click | ||||
| import csv | ||||
| import re | ||||
| import sqlite3 | ||||
| 
 | ||||
| from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
| from typing import Tuple | ||||
| 
 | ||||
| @dataclass | ||||
| class TimingData: | ||||
|     tasks: int | ||||
|     nodes: int | ||||
|     gpus_per_node: int | ||||
|     blockcount: Tuple[int, int, int] | ||||
|     array_index: int | ||||
| 
 | ||||
|     # Given in seconds | ||||
|     timings_by_task: [float] | ||||
| 
 | ||||
| RUN_PATTERN = re.compile(r".*t([0-9]+)n([0-9]+)g([0-9]+)x([0-9]+)y([0-9]+)z([0-9]+)-([0-9]+)") | ||||
| TIMING_PATTERN = re.compile(r"timing-([0-9]+)\.dat") | ||||
| TIMING_ROW_PATTERN = re.compile(r"([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+).*") | ||||
| 
 | ||||
| def get_timing(timing_path: Path, action: str) -> float: | ||||
|     with timing_path.open(encoding="utf8") as timing_file: | ||||
|         for line in timing_file: | ||||
|             m = TIMING_ROW_PATTERN.match(line) | ||||
|             if not m: | ||||
|                 continue | ||||
| 
 | ||||
|             if m.group(1) == action: | ||||
|                 return float(m.group(5)) | ||||
| 
 | ||||
|     raise f"Could not find action '{action}' in {timing_path}" | ||||
| 
 | ||||
| def get_timings(d: Path, action: str) -> [float]: | ||||
|     timings = [] | ||||
|     for timing_path in d.iterdir(): | ||||
|         i = int(TIMING_PATTERN.match(timing_path.name).group(1)) | ||||
|         timings.append((i, get_timing(timing_path, action) / 1_000_000)) | ||||
| 
 | ||||
|     return [timing for i, timing in sorted(timings, key=lambda t: t[0])] | ||||
| 
 | ||||
| def get_outdir_timing_data(d: Path, action: str) -> TimingData: | ||||
|     match_results = RUN_PATTERN.match(d.name) | ||||
|     tasks = int(match_results.group(1)) | ||||
|     nodes = int(match_results.group(2)) | ||||
|     gpus_per_node = int(match_results.group(3)) | ||||
|     blockcount = ( | ||||
|         int(match_results.group(4)), | ||||
|         int(match_results.group(5)), | ||||
|         int(match_results.group(6)) | ||||
|     ) | ||||
|     array_index = int(match_results.group(7)) | ||||
|     timings_by_task = get_timings(d / "timing", action) | ||||
| 
 | ||||
|     return TimingData( | ||||
|         tasks, | ||||
|         nodes, | ||||
|         gpus_per_node, | ||||
|         blockcount, | ||||
|         array_index, | ||||
|         timings_by_task | ||||
|     ) | ||||
| 
 | ||||
| @click.group() | ||||
| def timing(): | ||||
|     pass | ||||
| 
 | ||||
| @timing.command() | ||||
| @click.argument( | ||||
|     "directories", | ||||
|     type=click.Path(exists=True, file_okay=False, path_type=Path), | ||||
|     nargs=-1 | ||||
| ) | ||||
| @click.option("--db", default="timings.db", help="Path of sqlite database file") | ||||
| def make_timing_db(directories, db): | ||||
|     """ | ||||
|     Collect NAStJA timing data from all passed directories and save them into a SQLite database. | ||||
| 
 | ||||
|     Drops the timings table from the given database and creates a new timings table. | ||||
| 
 | ||||
|     @param db asjdas | ||||
|     """ | ||||
|     db = sqlite3.connect(db) | ||||
|     c = db.cursor() | ||||
|     c.execute("drop table if exists timings") | ||||
|     c.execute("create table timings (tasks, blockcount_x, blockcount_y, blockcount_z, array_index, averagetime)") | ||||
| 
 | ||||
|     print("Collecting timing info...") | ||||
|     for d in directories: | ||||
|         print(d) | ||||
|         t = get_outdir_timing_data(d, "Sweep:DynamicECM") | ||||
|         c.executemany( | ||||
|             "insert into timings values (?, ?, ?, ?, ?, ?)", | ||||
|             [ | ||||
|                 (t.tasks, t.blockcount[0], t.blockcount[1], t.blockcount[2], t.array_index, sum(t.timings_by_task) / len(t.timings_by_task)) | ||||
|             ] | ||||
|         ) | ||||
|     print("Done, committing into DB...") | ||||
|      | ||||
|     db.commit() | ||||
| 
 | ||||
|     print("Done!") | ||||
| 
 | ||||
| @timing.command() | ||||
| @click.option("--db", default="timings.db", help="Path of sqlite database file") | ||||
| @click.option("--time/--no-time", default=False, help="Print average time of best run instead of speedup") | ||||
| def strong_dat(db, time): | ||||
|     db = sqlite3.connect(db) | ||||
|     c = db.cursor() | ||||
|     res = c.execute(""" | ||||
|     select tasks, min(avg) | ||||
|     from ( | ||||
|         select tasks, blockcount_x, blockcount_y, blockcount_z, sum(averagetime) / count(*) as avg | ||||
|         from timings group by tasks, blockcount_x, blockcount_y, blockcount_z | ||||
|     ) group by tasks order by tasks asc; | ||||
|     """) | ||||
|     values = res.fetchall() | ||||
| 
 | ||||
|     if not time: | ||||
|         print("gpus\tspeedup") | ||||
|         for tasks, time in values: | ||||
|             print(f"{tasks}\t{values[0][1] / time}") | ||||
|     else: | ||||
|         print("gpus\ttime") | ||||
|         for tasks, time in values: | ||||
|             print(f"{tasks}\t{time}") | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     timing() | ||||
| @ -82,14 +82,15 @@ | ||||
|         } | ||||
|     }, | ||||
|     "Geometry": { | ||||
|         "blockcount": [4, 4, 3], | ||||
|         "blocksize": [100, 100, 134] | ||||
|         "blockcount": null, | ||||
|         "blocksize": null | ||||
|     }, | ||||
|     "Settings": { | ||||
|         "randomseed": 0, | ||||
|         "timesteps": 100 | ||||
|         "timesteps": 20, | ||||
|         "statusoutput": 1 | ||||
|     }, | ||||
|     "WriteActions": ["CellInfo"], | ||||
|     "WriteActions": [], | ||||
|     "Writers": { | ||||
|         "CellInfo": { | ||||
|             "field": "", | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user