import click import csv import re import sqlite3 from dataclasses import dataclass from pathlib import Path from typing import Tuple @dataclass class TimingData: tasks: int nodes: int gpus_per_node: int blockcount: Tuple[int, int, int] array_index: int # Given in seconds timings_by_task: [float] RUN_PATTERN = re.compile(r".*t([0-9]+)n([0-9]+)g([0-9]+)x([0-9]+)y([0-9]+)z([0-9]+)-([0-9]+)") TIMING_PATTERN = re.compile(r"timing-([0-9]+)\.dat") TIMING_ROW_PATTERN = re.compile(r"([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+).*") def get_timing(timing_path: Path, action: str) -> float: with timing_path.open(encoding="utf8") as timing_file: for line in timing_file: m = TIMING_ROW_PATTERN.match(line) if not m: continue if m.group(1) == action: return float(m.group(5)) raise f"Could not find action '{action}' in {timing_path}" def get_timings(d: Path, action: str) -> [float]: timings = [] for timing_path in d.iterdir(): i = int(TIMING_PATTERN.match(timing_path.name).group(1)) timings.append((i, get_timing(timing_path, action) / 1_000_000)) return [timing for i, timing in sorted(timings, key=lambda t: t[0])] def get_outdir_timing_data(d: Path, action: str) -> TimingData: match_results = RUN_PATTERN.match(d.name) tasks = int(match_results.group(1)) nodes = int(match_results.group(2)) gpus_per_node = int(match_results.group(3)) blockcount = ( int(match_results.group(4)), int(match_results.group(5)), int(match_results.group(6)) ) array_index = int(match_results.group(7)) timings_by_task = get_timings(d / "timing", action) return TimingData( tasks, nodes, gpus_per_node, blockcount, array_index, timings_by_task ) @click.group() def timing(): pass @timing.command() @click.argument( "directories", type=click.Path(exists=True, file_okay=False, path_type=Path), nargs=-1 ) @click.option("--db", default="timings.db", help="Path of sqlite database file") def make_timing_db(directories, db): """ Collect NAStJA timing data from all passed directories and save them into a SQLite database. Drops the timings table from the given database and creates a new timings table. @param db asjdas """ db = sqlite3.connect(db) c = db.cursor() c.execute("drop table if exists timings") c.execute("create table timings (tasks, blockcount_x, blockcount_y, blockcount_z, array_index, averagetime)") print("Collecting timing info...") for d in directories: print(d) t = get_outdir_timing_data(d, "Sweep:DynamicECM") c.executemany( "insert into timings values (?, ?, ?, ?, ?, ?)", [ (t.tasks, t.blockcount[0], t.blockcount[1], t.blockcount[2], t.array_index, sum(t.timings_by_task) / len(t.timings_by_task)) ] ) print("Done, committing into DB...") db.commit() print("Done!") @timing.command() @click.option("--db", default="timings.db", help="Path of sqlite database file") @click.option("--time/--no-time", default=False, help="Print average time of best run instead of speedup") def strong_dat(db, time): db = sqlite3.connect(db) c = db.cursor() res = c.execute(""" select tasks, min(avg) from ( select tasks, blockcount_x, blockcount_y, blockcount_z, sum(averagetime) / count(*) as avg from timings group by tasks, blockcount_x, blockcount_y, blockcount_z ) group by tasks order by tasks asc; """) values = res.fetchall() if not time: print("gpus\tspeedup") for tasks, time in values: print(f"{tasks}\t{values[0][1] / time}") else: print("gpus\ttime") for tasks, time in values: print(f"{tasks}\t{time}") @timing.command() @click.option("--db", default="timings.db", help="Path of sqlite database file") def weak_dat(db): db = sqlite3.connect(db) c = db.cursor() res = c.execute(""" select tasks, avg(averagetime) as mean from timings group by tasks order by tasks asc """) values = res.fetchall() print("gpus\tefficiency") for tasks, mean in values: print(f"{tasks}\t{values[0][1] / mean}") if __name__ == "__main__": timing()