Compare commits
	
		
			3 Commits
		
	
	
		
			9d7b0028a6
			...
			4661646b4f
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 4661646b4f | |||
| f563cbf9e4 | |||
| 6dbbf45043 | 
							
								
								
									
										2
									
								
								experiments/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								experiments/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -5,3 +5,5 @@ __pycache__ | |||||||
| generated/* | generated/* | ||||||
| batch/measurements/strong/* | batch/measurements/strong/* | ||||||
| configs/measurements/strong/* | configs/measurements/strong/* | ||||||
|  | *.dat | ||||||
|  | *.db | ||||||
|  | |||||||
| @ -4,12 +4,13 @@ | |||||||
|   }, |   }, | ||||||
|   "Application": "Cells", |   "Application": "Cells", | ||||||
|   "Geometry": { |   "Geometry": { | ||||||
|     "blocksize": [90, 90, 60], |     "blocksize": [180, 180, 180], | ||||||
|     "blockcount": [2, 2, 3] |     "blockcount": [1, 1, 1] | ||||||
|   }, |   }, | ||||||
|   "Settings": { |   "Settings": { | ||||||
|     "timesteps": 100, |     "timesteps": 100, | ||||||
|     "randomseed": 42 |     "randomseed": 42, | ||||||
|  |     "statusoutput": 1 | ||||||
|   }, |   }, | ||||||
|   "Filling": { |   "Filling": { | ||||||
|     "cells": [ |     "cells": [ | ||||||
| @ -95,14 +96,14 @@ | |||||||
|       "writer": "ParallelVtkImage", |       "writer": "ParallelVtkImage", | ||||||
|       "outputtype": "UInt32", |       "outputtype": "UInt32", | ||||||
|       "field": "cells", |       "field": "cells", | ||||||
|       "steps": 1 |       "steps": 10 | ||||||
|     }, |     }, | ||||||
|     "ParallelVTK_Displacement": { |     "ParallelVTK_Displacement": { | ||||||
|       "writer": "ParallelVtkImage", |       "writer": "ParallelVtkImage", | ||||||
|       "outputtype": "Float32", |       "outputtype": "Float32", | ||||||
|       "field": "dynamicecm", |       "field": "dynamicecm", | ||||||
|       "components": [0, 1, 2], |       "components": [0, 1, 2], | ||||||
|       "steps": 1 |       "steps": 10 | ||||||
|     } |     } | ||||||
|   }, |   }, | ||||||
|   "WriteActions": ["ParallelVTK_Cells", "ParallelVTK_Displacement"] |   "WriteActions": ["ParallelVTK_Cells", "ParallelVTK_Displacement"] | ||||||
|  | |||||||
| @ -1,29 +1,51 @@ | |||||||
| import copy | import copy | ||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
| from dataclasses import dataclass |  | ||||||
| from typing import Tuple | from typing import Tuple | ||||||
| 
 | 
 | ||||||
| @dataclass | from toolkit import Configuration | ||||||
| class Configuration: |  | ||||||
|     nodes: int |  | ||||||
|     tasks: int |  | ||||||
|     gpus_per_node: int |  | ||||||
|     blockcount: Tuple[int, int, int] |  | ||||||
|     blocksize: Tuple[int, int, int] |  | ||||||
| 
 | 
 | ||||||
|     def get_domain_size(self) -> int: | SIZE_X = 400 | ||||||
|         return self.blockcount[0] * self.blocksize[0] * self.blockcount[1] * self.blocksize[1] * self.blockcount[2] * self.blocksize[2] | SIZE_Y = 400 | ||||||
|  | SIZE_Z = 400 | ||||||
|  | 
 | ||||||
|  | def make_config(gpus: int, blockcount: Tuple[int, int, int]) -> Configuration: | ||||||
|  |     assert gpus % 4 == 0 | ||||||
|  |     assert SIZE_X % blockcount[0] == 0 | ||||||
|  |     assert SIZE_Y % blockcount[1] == 0 | ||||||
|  |     assert SIZE_Z % blockcount[2] == 0 | ||||||
|  |     assert blockcount[0] * blockcount[1] * blockcount[2] == gpus | ||||||
|  | 
 | ||||||
|  |     return Configuration( | ||||||
|  |         gpus // 4, | ||||||
|  |         gpus, | ||||||
|  |         4, | ||||||
|  |         blockcount, | ||||||
|  |         (SIZE_X // blockcount[0], SIZE_Y // blockcount[1], SIZE_Z // blockcount[2]) | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
| configurations = [ | configurations = [ | ||||||
|     Configuration( 1,   12,   1, ( 1,  4, 3), (400, 100, 134)), |     Configuration(1, 1, 1, (1, 1, 1), (400, 400, 400)), | ||||||
|     Configuration( 1,   24,   2, ( 2,  4, 3), (200, 100, 134)), |     Configuration(1, 2, 2, (1, 1, 2), (400, 400, 200)), | ||||||
|     Configuration( 1,   48,   4, ( 4,  4, 3), (100, 100, 134)), |     Configuration(1, 2, 2, (1, 2, 1), (400, 200, 400)), | ||||||
|     Configuration( 2,   96,   4, ( 4,  4, 6), (100, 100,  67)), |     Configuration(1, 2, 2, (2, 1, 1), (200, 400, 400)), | ||||||
|     Configuration( 4,  192,   4, ( 4,  8, 6), (100,  50,  67)), |     make_config(4, (1, 1, 4)), | ||||||
|     Configuration( 8,  384,   4, ( 8,  8, 6), ( 50,  50,  67)), |     make_config(4, (1, 4, 1)), | ||||||
|     Configuration(16,  768,   4, ( 8, 16, 6), ( 50,  25,  67)), |     make_config(4, (4, 1, 1)), | ||||||
|     Configuration(32, 1536,   4, (16, 16, 6), ( 25,  25,  67)) |     make_config(4, (1, 2, 2)), | ||||||
|  |     make_config(4, (2, 1, 2)), | ||||||
|  |     make_config(4, (2, 2, 1)), | ||||||
|  |     make_config(8, (2, 2, 2)), | ||||||
|  |     make_config(8, (1, 2, 4)), | ||||||
|  |     make_config(16, (1, 4, 4)), | ||||||
|  |     make_config(16, (2, 2, 4)), | ||||||
|  |     make_config(32, (2, 4, 4)), | ||||||
|  |     make_config(64, (4, 4, 4)), | ||||||
|  |     make_config(128, (4, 4, 8)), | ||||||
|  |     make_config(256, (4, 8, 8)), | ||||||
|  |     make_config(512, (8, 8, 8)), | ||||||
|  |     make_config(1024, (8, 8, 16)), | ||||||
|  |     make_config(2048, (8, 16, 16)) | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| with open("templates/spheroid.json") as template_file: | with open("templates/spheroid.json") as template_file: | ||||||
| @ -38,10 +60,7 @@ for c in configurations: | |||||||
|     nastja_config["Geometry"]["blockcount"] = c.blockcount |     nastja_config["Geometry"]["blockcount"] = c.blockcount | ||||||
|     nastja_config["Geometry"]["blocksize"] = c.blocksize |     nastja_config["Geometry"]["blocksize"] = c.blocksize | ||||||
| 
 | 
 | ||||||
|     label = f"{c.nodes:02}" |     label = c.get_label() | ||||||
| 
 |  | ||||||
|     if c.gpus_per_node < 4: |  | ||||||
|         label += f"g{c.gpus_per_node}" |  | ||||||
| 
 | 
 | ||||||
|     with open(f"configs/measurements/strong/spheroid_{label}.json", "w") as config_file: |     with open(f"configs/measurements/strong/spheroid_{label}.json", "w") as config_file: | ||||||
|         json.dump(nastja_config, config_file, indent=2) |         json.dump(nastja_config, config_file, indent=2) | ||||||
| @ -55,7 +74,7 @@ for c in configurations: | |||||||
| #SBATCH --ntasks={c.tasks} | #SBATCH --ntasks={c.tasks} | ||||||
| # Counted per node | # Counted per node | ||||||
| #SBATCH --gres=gpu:{c.gpus_per_node} | #SBATCH --gres=gpu:{c.gpus_per_node} | ||||||
| #SBATCH --time=06:00:00 | #SBATCH --time=00:30:00 | ||||||
| #SBATCH --output=logs/strong-{label}-%A_%a.log | #SBATCH --output=logs/strong-{label}-%A_%a.log | ||||||
| #SBATCH --error=logs/strong-{label}-%A_%a.log | #SBATCH --error=logs/strong-{label}-%A_%a.log | ||||||
| #SBATCH --array=1-5 | #SBATCH --array=1-5 | ||||||
| @ -63,13 +82,13 @@ for c in configurations: | |||||||
| SOURCE_DIR=/p/project/cellsinsilico/paulslustigebude | SOURCE_DIR=/p/project/cellsinsilico/paulslustigebude | ||||||
| OUTPUT_DIR="/p/scratch/cellsinsilico/paul/nastja-out/strong-{label}-${{SLURM_ARRAY_TASK_ID}}" | OUTPUT_DIR="/p/scratch/cellsinsilico/paul/nastja-out/strong-{label}-${{SLURM_ARRAY_TASK_ID}}" | ||||||
| 
 | 
 | ||||||
| echo "${{OUTPUT_DIR}}" | echo "outdir is ${{OUTPUT_DIR}}" | ||||||
| 
 | 
 | ||||||
| mkdir -p "${{OUTPUT_DIR}}" | mkdir -p "${{OUTPUT_DIR}}" | ||||||
| source "${{SOURCE_DIR}}/activate-nastja-modules" | source "${{SOURCE_DIR}}/activate-nastja-modules" | ||||||
| 
 | 
 | ||||||
| srun --unbuffered "${{SOURCE_DIR}}/nastja/build-cuda/nastja" \\ | srun --unbuffered "${{SOURCE_DIR}}/nastja/build-cuda/nastja" \\ | ||||||
|   -c "${{SOURCE_DIR}}/ma/experiments/configs/measurements/strong/spheroid_{c.nodes:02}.json" \\ |   -c "${{SOURCE_DIR}}/ma/experiments/configs/measurements/strong/spheroid_{label}.json" \\ | ||||||
|   -o "${{OUTPUT_DIR}}" |   -o "${{OUTPUT_DIR}}" | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										16
									
								
								experiments/scripts/gen/toolkit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								experiments/scripts/gen/toolkit.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | |||||||
|  | from dataclasses import dataclass | ||||||
|  | from typing import Tuple | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class Configuration: | ||||||
|  |     nodes: int | ||||||
|  |     tasks: int | ||||||
|  |     gpus_per_node: int | ||||||
|  |     blockcount: Tuple[int, int, int] | ||||||
|  |     blocksize: Tuple[int, int, int] | ||||||
|  | 
 | ||||||
|  |     def get_domain_size(self) -> int: | ||||||
|  |         return self.blockcount[0] * self.blocksize[0] * self.blockcount[1] * self.blocksize[1] * self.blockcount[2] * self.blocksize[2] | ||||||
|  | 
 | ||||||
|  |     def get_label(self) -> str: | ||||||
|  |         return f"t{self.tasks:04}n{self.nodes:03}g{self.gpus_per_node}x{self.blockcount[0]}y{self.blockcount[1]}z{self.blockcount[2]}" | ||||||
							
								
								
									
										132
									
								
								experiments/scripts/timing.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								experiments/scripts/timing.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,132 @@ | |||||||
|  | import click | ||||||
|  | import csv | ||||||
|  | import re | ||||||
|  | import sqlite3 | ||||||
|  | 
 | ||||||
|  | from dataclasses import dataclass | ||||||
|  | from pathlib import Path | ||||||
|  | from typing import Tuple | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class TimingData: | ||||||
|  |     tasks: int | ||||||
|  |     nodes: int | ||||||
|  |     gpus_per_node: int | ||||||
|  |     blockcount: Tuple[int, int, int] | ||||||
|  |     array_index: int | ||||||
|  | 
 | ||||||
|  |     # Given in seconds | ||||||
|  |     timings_by_task: [float] | ||||||
|  | 
 | ||||||
|  | RUN_PATTERN = re.compile(r".*t([0-9]+)n([0-9]+)g([0-9]+)x([0-9]+)y([0-9]+)z([0-9]+)-([0-9]+)") | ||||||
|  | TIMING_PATTERN = re.compile(r"timing-([0-9]+)\.dat") | ||||||
|  | TIMING_ROW_PATTERN = re.compile(r"([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+).*") | ||||||
|  | 
 | ||||||
|  | def get_timing(timing_path: Path, action: str) -> float: | ||||||
|  |     with timing_path.open(encoding="utf8") as timing_file: | ||||||
|  |         for line in timing_file: | ||||||
|  |             m = TIMING_ROW_PATTERN.match(line) | ||||||
|  |             if not m: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             if m.group(1) == action: | ||||||
|  |                 return float(m.group(5)) | ||||||
|  | 
 | ||||||
|  |     raise f"Could not find action '{action}' in {timing_path}" | ||||||
|  | 
 | ||||||
|  | def get_timings(d: Path, action: str) -> [float]: | ||||||
|  |     timings = [] | ||||||
|  |     for timing_path in d.iterdir(): | ||||||
|  |         i = int(TIMING_PATTERN.match(timing_path.name).group(1)) | ||||||
|  |         timings.append((i, get_timing(timing_path, action) / 1_000_000)) | ||||||
|  | 
 | ||||||
|  |     return [timing for i, timing in sorted(timings, key=lambda t: t[0])] | ||||||
|  | 
 | ||||||
|  | def get_outdir_timing_data(d: Path, action: str) -> TimingData: | ||||||
|  |     match_results = RUN_PATTERN.match(d.name) | ||||||
|  |     tasks = int(match_results.group(1)) | ||||||
|  |     nodes = int(match_results.group(2)) | ||||||
|  |     gpus_per_node = int(match_results.group(3)) | ||||||
|  |     blockcount = ( | ||||||
|  |         int(match_results.group(4)), | ||||||
|  |         int(match_results.group(5)), | ||||||
|  |         int(match_results.group(6)) | ||||||
|  |     ) | ||||||
|  |     array_index = int(match_results.group(7)) | ||||||
|  |     timings_by_task = get_timings(d / "timing", action) | ||||||
|  | 
 | ||||||
|  |     return TimingData( | ||||||
|  |         tasks, | ||||||
|  |         nodes, | ||||||
|  |         gpus_per_node, | ||||||
|  |         blockcount, | ||||||
|  |         array_index, | ||||||
|  |         timings_by_task | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | @click.group() | ||||||
|  | def timing(): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  | @timing.command() | ||||||
|  | @click.argument( | ||||||
|  |     "directories", | ||||||
|  |     type=click.Path(exists=True, file_okay=False, path_type=Path), | ||||||
|  |     nargs=-1 | ||||||
|  | ) | ||||||
|  | @click.option("--db", default="timings.db", help="Path of sqlite database file") | ||||||
|  | def make_timing_db(directories, db): | ||||||
|  |     """ | ||||||
|  |     Collect NAStJA timing data from all passed directories and save them into a SQLite database. | ||||||
|  | 
 | ||||||
|  |     Drops the timings table from the given database and creates a new timings table. | ||||||
|  | 
 | ||||||
|  |     @param db asjdas | ||||||
|  |     """ | ||||||
|  |     db = sqlite3.connect(db) | ||||||
|  |     c = db.cursor() | ||||||
|  |     c.execute("drop table if exists timings") | ||||||
|  |     c.execute("create table timings (tasks, blockcount_x, blockcount_y, blockcount_z, array_index, averagetime)") | ||||||
|  | 
 | ||||||
|  |     print("Collecting timing info...") | ||||||
|  |     for d in directories: | ||||||
|  |         print(d) | ||||||
|  |         t = get_outdir_timing_data(d, "Sweep:DynamicECM") | ||||||
|  |         c.executemany( | ||||||
|  |             "insert into timings values (?, ?, ?, ?, ?, ?)", | ||||||
|  |             [ | ||||||
|  |                 (t.tasks, t.blockcount[0], t.blockcount[1], t.blockcount[2], t.array_index, sum(t.timings_by_task) / len(t.timings_by_task)) | ||||||
|  |             ] | ||||||
|  |         ) | ||||||
|  |     print("Done, committing into DB...") | ||||||
|  |      | ||||||
|  |     db.commit() | ||||||
|  | 
 | ||||||
|  |     print("Done!") | ||||||
|  | 
 | ||||||
|  | @timing.command() | ||||||
|  | @click.option("--db", default="timings.db", help="Path of sqlite database file") | ||||||
|  | @click.option("--time/--no-time", default=False, help="Print average time of best run instead of speedup") | ||||||
|  | def strong_dat(db, time): | ||||||
|  |     db = sqlite3.connect(db) | ||||||
|  |     c = db.cursor() | ||||||
|  |     res = c.execute(""" | ||||||
|  |     select tasks, min(avg) | ||||||
|  |     from ( | ||||||
|  |         select tasks, blockcount_x, blockcount_y, blockcount_z, sum(averagetime) / count(*) as avg | ||||||
|  |         from timings group by tasks, blockcount_x, blockcount_y, blockcount_z | ||||||
|  |     ) group by tasks order by tasks asc; | ||||||
|  |     """) | ||||||
|  |     values = res.fetchall() | ||||||
|  | 
 | ||||||
|  |     if not time: | ||||||
|  |         print("gpus\tspeedup") | ||||||
|  |         for tasks, time in values: | ||||||
|  |             print(f"{tasks}\t{values[0][1] / time}") | ||||||
|  |     else: | ||||||
|  |         print("gpus\ttime") | ||||||
|  |         for tasks, time in values: | ||||||
|  |             print(f"{tasks}\t{time}") | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     timing() | ||||||
| @ -82,14 +82,15 @@ | |||||||
|         } |         } | ||||||
|     }, |     }, | ||||||
|     "Geometry": { |     "Geometry": { | ||||||
|         "blockcount": [4, 4, 3], |         "blockcount": null, | ||||||
|         "blocksize": [100, 100, 134] |         "blocksize": null | ||||||
|     }, |     }, | ||||||
|     "Settings": { |     "Settings": { | ||||||
|         "randomseed": 0, |         "randomseed": 0, | ||||||
|         "timesteps": 100 |         "timesteps": 20, | ||||||
|  |         "statusoutput": 1 | ||||||
|     }, |     }, | ||||||
|     "WriteActions": ["CellInfo"], |     "WriteActions": [], | ||||||
|     "Writers": { |     "Writers": { | ||||||
|         "CellInfo": { |         "CellInfo": { | ||||||
|             "field": "", |             "field": "", | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user