ma/experiments/scripts/timing.py

133 lines
3.9 KiB
Python

import click
import csv
import re
import sqlite3
from dataclasses import dataclass
from pathlib import Path
from typing import Tuple
@dataclass
class TimingData:
tasks: int
nodes: int
gpus_per_node: int
blockcount: Tuple[int, int, int]
array_index: int
# Given in seconds
timings_by_task: [float]
RUN_PATTERN = re.compile(r".*t([0-9]+)n([0-9]+)g([0-9]+)x([0-9]+)y([0-9]+)z([0-9]+)-([0-9]+)")
TIMING_PATTERN = re.compile(r"timing-([0-9]+)\.dat")
TIMING_ROW_PATTERN = re.compile(r"([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+).*")
def get_timing(timing_path: Path, action: str) -> float:
with timing_path.open(encoding="utf8") as timing_file:
for line in timing_file:
m = TIMING_ROW_PATTERN.match(line)
if not m:
continue
if m.group(1) == action:
return float(m.group(5))
raise f"Could not find action '{action}' in {timing_path}"
def get_timings(d: Path, action: str) -> [float]:
timings = []
for timing_path in d.iterdir():
i = int(TIMING_PATTERN.match(timing_path.name).group(1))
timings.append((i, get_timing(timing_path, action) / 1_000_000))
return [timing for i, timing in sorted(timings, key=lambda t: t[0])]
def get_outdir_timing_data(d: Path, action: str) -> TimingData:
match_results = RUN_PATTERN.match(d.name)
tasks = int(match_results.group(1))
nodes = int(match_results.group(2))
gpus_per_node = int(match_results.group(3))
blockcount = (
int(match_results.group(4)),
int(match_results.group(5)),
int(match_results.group(6))
)
array_index = int(match_results.group(7))
timings_by_task = get_timings(d / "timing", action)
return TimingData(
tasks,
nodes,
gpus_per_node,
blockcount,
array_index,
timings_by_task
)
@click.group()
def timing():
pass
@timing.command()
@click.argument(
"directories",
type=click.Path(exists=True, file_okay=False, path_type=Path),
nargs=-1
)
@click.option("--db", default="timings.db", help="Path of sqlite database file")
def make_timing_db(directories, db):
"""
Collect NAStJA timing data from all passed directories and save them into a SQLite database.
Drops the timings table from the given database and creates a new timings table.
@param db asjdas
"""
db = sqlite3.connect(db)
c = db.cursor()
c.execute("drop table if exists timings")
c.execute("create table timings (tasks, blockcount_x, blockcount_y, blockcount_z, array_index, averagetime)")
print("Collecting timing info...")
for d in directories:
print(d)
t = get_outdir_timing_data(d, "Sweep:DynamicECM")
c.executemany(
"insert into timings values (?, ?, ?, ?, ?, ?)",
[
(t.tasks, t.blockcount[0], t.blockcount[1], t.blockcount[2], t.array_index, sum(t.timings_by_task) / len(t.timings_by_task))
]
)
print("Done, committing into DB...")
db.commit()
print("Done!")
@timing.command()
@click.option("--db", default="timings.db", help="Path of sqlite database file")
@click.option("--time/--no-time", default=False, help="Print average time of best run instead of speedup")
def strong_dat(db, time):
db = sqlite3.connect(db)
c = db.cursor()
res = c.execute("""
select tasks, min(avg)
from (
select tasks, blockcount_x, blockcount_y, blockcount_z, sum(averagetime) / count(*) as avg
from timings group by tasks, blockcount_x, blockcount_y, blockcount_z
) group by tasks order by tasks asc;
""")
values = res.fetchall()
if not time:
print("gpus\tspeedup")
for tasks, time in values:
print(f"{tasks}\t{values[0][1] / time}")
else:
print("gpus\ttime")
for tasks, time in values:
print(f"{tasks}\t{time}")
if __name__ == "__main__":
timing()