From 207038a521ec7b414551b15327dc07291d02af59 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 02:05:19 -0800 Subject: [PATCH 01/18] Add editor configuration files --- .editorconfig | 12 ++++++++++++ .gitignore | 5 +++++ 2 files changed, 17 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..f72537c --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +# This file contains contains settings which ensure consistent formatting for files +# including indentation and newlines across various editors + +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a5ed5b0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.egg-info +**/__pycache__/ +*.log +**/*.lic +.vscode From ec772df5a4b28123ca7d5f99a2f6b74cd6fbe861 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 02:10:39 -0800 Subject: [PATCH 02/18] Add plotting utility, modify CLI command, and clean up output --- poet/__main__.py | 43 ------- poet/poet_solver.py | 28 ++-- poet/poet_solver_gurobi.py | 15 +-- poet/solve.py | 136 +++++++++++++------- poet/util.py | 59 ++++++++- poet/utils/checkmate/core/enum_strategy.py | 82 ++++++++++++ poet/utils/checkmate/plot/graph_plotting.py | 85 ++++++++++++ 7 files changed, 339 insertions(+), 109 deletions(-) delete mode 100644 poet/__main__.py create mode 100644 poet/utils/checkmate/core/enum_strategy.py create mode 100644 poet/utils/checkmate/plot/graph_plotting.py diff --git a/poet/__main__.py b/poet/__main__.py deleted file mode 100644 index 0c13160..0000000 --- a/poet/__main__.py +++ /dev/null @@ -1,43 +0,0 @@ -from argparse import ArgumentParser -from poet import solve - -if __name__ == "__main__": - parser = ArgumentParser(description="Solve a POET LP problem") - parser.add_argument( - "--model", - type=str, - required=True, - choices=["vgg16", "vgg16_cifar", "resnet18", "resnet50", "resnet18_cifar", "bert", "transformer", "linear"], - ) - parser.add_argument("--platform", type=str, required=True, choices=["m0", "a72", "a72nocache", "m4", "jetsontx2"]) - parser.add_argument("--ram-budget", type=int, required=True) - parser.add_argument("--runtime-budget", type=float, required=True) - parser.add_argument("--batch-size", type=int, default=1) - parser.add_argument("--mem-power-scale", type=float, default=1.0) - parser.add_argument("--paging", action="store_true", default=True) - parser.add_argument("--remat", action="store_true", default=True) - parser.add_argument("--time-limit-s", type=int, default=1e100) - parser.add_argument("--solve-threads", type=int, default=4) - parser.add_argument("--solver", type=str, default="gurobi", choices=["gurobi", "cbc"]) - parser.add_argument("--use-actual-gurobi", action="store_true", default=False) - args = parser.parse_args() - - result = solve( - model=args.model, - platform=args.platform, - ram_budget=args.ram_budget, - runtime_budget=args.runtime_budget, - batch_size=args.batch_size, - mem_power_scale=args.mem_power_scale, - paging=args.paging, - remat=args.remat, - time_limit_s=args.time_limit_s, - solve_threads=args.solve_threads, - solver=args.solver, - use_actual_gurobi=args.use_actual_gurobi, - ) - - print("POET successfully found a solution!") - print("==> R (recomputation matrix):", result["solution"].R) - print("==> Min (Page-in matrix):", result["solution"].Min) - print("==> Mout (Page-out matrix)", result["solution"].Mout) diff --git a/poet/poet_solver.py b/poet/poet_solver.py index 0e412a0..2bce687 100644 --- a/poet/poet_solver.py +++ b/poet/poet_solver.py @@ -12,12 +12,13 @@ @dataclass class POETSolution: R: np.ndarray - Sram: np.ndarray - Ssd: np.ndarray + SRam: np.ndarray + SSd: np.ndarray Min: np.ndarray Mout: np.ndarray FreeE: np.ndarray U: np.ndarray + optimal: bool feasible: bool solve_time_s: Optional[float] = float("inf") @@ -163,24 +164,35 @@ def _disable_remat(self): for i in range(self.T): self.m += self.R[t][i] == True if t == i else False + def is_feasible(self): + return self.m.status not in [ + pl.LpStatusInfeasible, + pl.LpStatusNotSolved, + pl.LpStatusUndefined, + pl.LpStatusUnbounded, + ] + def get_result(self, var_matrix, dtype=int): - if self.m.status != pl.LpStatusOptimal: + if not self.is_feasible(): return None - return [[dtype(pl.value(var_matrix[i][j])) for j in range(len(var_matrix[0]))] for i in range(len(var_matrix))] + return [ + [dtype(pl.value(var_matrix[i][j])) for j in range(len(var_matrix[0]))] + for i in range(len(var_matrix)) + ] def solve(self): with Timer("solve_timer") as t: self.m.solve(self.solver) - is_feasible = self.m.status == pl.LpStatusOptimal return POETSolution( R=self.get_result(self.R), - Sram=self.get_result(self.SRam), - Ssd=self.get_result(self.SSd), + SRam=self.get_result(self.SRam), + SSd=self.get_result(self.SSd), Min=self.get_result(self.MIn), Mout=self.get_result(self.MOut), FreeE=self.get_result(self.Free_E), U=self.get_result(self.U, dtype=float), - feasible=is_feasible, + optimal=self.m.status == pl.LpStatusOptimal, + feasible=self.is_feasible(), solve_time_s=t.elapsed, ) diff --git a/poet/poet_solver_gurobi.py b/poet/poet_solver_gurobi.py index 816b6a8..d3c1f44 100644 --- a/poet/poet_solver_gurobi.py +++ b/poet/poet_solver_gurobi.py @@ -6,10 +6,10 @@ from gurobipy import GRB, Model, quicksum from loguru import logger +from poet.poet_solver import POETSolution from poet.utils.checkmate.core.dfgraph import DFGraph from poet.utils.checkmate.core.utils.definitions import PathLike from poet.utils.checkmate.core.utils.timer import Timer -from poet.poet_solver import POETSolution # noinspection PyPackageRequirements @@ -233,21 +233,16 @@ def solve( self.m.optimize() solve_time = t.elapsed - if self.m.status == GRB.INFEASIBLE: - is_feasible = False - elif self.m.solCount < 1: - is_feasible = False - logger.error(f"Model status is {self.m.status} (not infeasible), but solCount is {self.m.solCount}") - else: - is_feasible = True + is_feasible = self.m.status != GRB.INFEASIBLE and self.m.solCount >= 1 return POETSolution( R=self.get_result(self.R, (self.T, self.T)), - Sram=self.get_result(self.SRam, (self.T, self.T)), - Ssd=self.get_result(self.SSd, (self.T, self.T)), + SRam=self.get_result(self.SRam, (self.T, self.T)), + SSd=self.get_result(self.SSd, (self.T, self.T)), Min=self.get_result(self.MIn, (self.T, self.T)), Mout=self.get_result(self.MOut, (self.T, self.T)), FreeE=self.get_result(self.Free_E, (self.T, len(self.g.edge_list))), U=self.get_result(self.U, (self.T, self.T), dtype=np.float), + optimal=self.m.status == GRB.OPTIMAL, feasible=is_feasible, solve_time_s=solve_time, ) diff --git a/poet/solve.py b/poet/solve.py index d9c9430..8adbb13 100644 --- a/poet/solve.py +++ b/poet/solve.py @@ -1,14 +1,13 @@ -from typing import Dict, Literal, Optional +from argparse import ArgumentParser +from typing import Literal, Optional + +import matplotlib.pyplot as plt import numpy as np -from poet.architectures.bert import BERTBase -from poet.architectures.linear import make_linear_network -from poet.architectures.resnet import resnet18, resnet18_cifar, resnet50 -from poet.architectures.vgg import vgg16 -from poet.chipsets import M4F, MKR1000, JetsonTX2, RPi, RPiNoCache -from poet.poet_solver import POETSolver +from poet import solve +from poet.poet_solver import POETSolution, POETSolver from poet.poet_solver_gurobi import POETSolverGurobi -from poet.util import make_dfgraph_costs +from poet.util import get_chipset_and_net, make_dfgraph_costs, plot_dfgraph def solve( @@ -33,9 +32,11 @@ def solve( use_actual_gurobi: Optional[bool] = True, # solver defines the model using PuLP and can swap in either cbc or Gurobi solver solver: Optional[Literal["gurobi", "cbc"]] = None, + print_power_costs: bool = False, + plot_directory: Optional[str] = None, time_limit_s: float = 1e100, solve_threads: Optional[int] = None, -) -> Dict: +): """Solve a POET LP problem. :param model: The model to solve for. :param platform: The platform to solve for. @@ -50,40 +51,12 @@ def solve( :param time_limit_s: The time limit for solving in seconds. :param solve_threads: The number of threads to use for solving. """ - if platform == "m0": - chipset = MKR1000 - elif platform == "a72": - chipset = RPi - elif platform == "a72nocache": - chipset = RPiNoCache - elif platform == "m4": - chipset = M4F - elif platform == "jetsontx2": - chipset = JetsonTX2 - else: - raise NotImplementedError() - - chipset["MEMORY_POWER"] *= mem_power_scale - - # make model - if model == "linear": - net = make_linear_network() - elif model == "vgg16": - net = vgg16(batch_size) - elif model == "vgg16_cifar": - net = vgg16(batch_size, 10, (3, 32, 32)) - elif model == "resnet18": - net = resnet18(batch_size) - elif model == "resnet50": - net = resnet50(batch_size) - elif model == "resnet18_cifar": - net = resnet18_cifar(batch_size, 10, (3, 32, 32)) - elif model == "bert": - net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=12) - elif model == "transformer": - net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=1) - else: - raise NotImplementedError() + chipset, net = get_chipset_and_net( + platform=platform, + model=model, + batch_size=batch_size, + mem_power_scale=mem_power_scale, + ) # build graph graph_costs = make_dfgraph_costs(net=net, device=chipset) @@ -94,9 +67,13 @@ def solve( pageout_power_cost_vec_joule, ) = graph_costs - print("CPU power cost:", cpu_power_cost_vec_joule) - print("Page-in power cost:", pagein_power_cost_vec_joule) - print("Page-out power cost:", pageout_power_cost_vec_joule) + if plot_directory is not None: + plot_dfgraph(g, plot_directory) + + if print_power_costs: + print("CPU power cost:", cpu_power_cost_vec_joule) + print("Page-in power cost:", pagein_power_cost_vec_joule) + print("Page-out power cost:", pageout_power_cost_vec_joule) total_runtime = sum(g.cost_cpu.values()) @@ -156,7 +133,72 @@ def solve( total_power_cost_page=total_power_cost_page, total_power_cost_cpu=total_power_cost_cpu, total_runtime=total_runtime, - feasible=(solution is not None and solution.feasible), + feasible=solution.feasible, ) return result + + +if __name__ == "__main__": + parser = ArgumentParser(description="Solve a POET LP problem") + parser.add_argument( + "--model", + type=str, + required=True, + choices=["vgg16", "vgg16_cifar", "resnet18", "resnet50", "resnet18_cifar", "bert", "transformer", "linear"], + ) + parser.add_argument("--platform", type=str, required=True, choices=["m0", "a72", "a72nocache", "m4", "jetsontx2"]) + parser.add_argument("--ram-budget", type=int, required=True) + parser.add_argument("--runtime-budget", type=float, required=True) + parser.add_argument("--batch-size", type=int, default=1) + parser.add_argument("--mem-power-scale", type=float, default=1.0) + parser.add_argument("--paging", action="store_true", default=True) + parser.add_argument("--remat", action="store_true", default=True) + parser.add_argument("--time-limit-s", type=int, default=1e100) + parser.add_argument("--solve-threads", type=int, default=4) + parser.add_argument("--solver", type=str, default="gurobi", choices=["gurobi", "cbc"]) + parser.add_argument("--use-actual-gurobi", action="store_true", default=False) + parser.add_argument("--print-power-costs", action="store_true", default=False) + parser.add_argument("--plot-directory", type=str, default=None) + args = parser.parse_args() + + result = solve( + model=args.model, + platform=args.platform, + ram_budget=args.ram_budget, + runtime_budget=args.runtime_budget, + batch_size=args.batch_size, + mem_power_scale=args.mem_power_scale, + paging=args.paging, + remat=args.remat, + time_limit_s=args.time_limit_s, + solve_threads=args.solve_threads, + solver=args.solver, + use_actual_gurobi=args.use_actual_gurobi, + print_power_costs=args.print_power_costs, + plot_directory=args.plot_directory, + ) + + solution: POETSolution = result["solution"] + if solution.feasible: + optimal = solution.optimal + solution_msg = "successfully found an optimal solution" if solution.optimal else "found a feasible solution" + print( + f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']} J of CPU power and {result['total_power_cost_page']} J of memory paging power" + ) + if not solution.optimal: + print("This solution is not guaranteed to be optimal - you can try increasing the time limit to find an optimal solution") + + plt.matshow(solution.R) + plt.title("R") + plt.show() + + plt.matshow(solution.SRam) + plt.title("SRam") + plt.show() + + plt.matshow(solution.SSd) + plt.title("SSd") + plt.show() + else: + print("POET failed to find a feasible solution within the provided time limit") diff --git a/poet/util.py b/poet/util.py index 1d92808..df8b2ec 100644 --- a/poet/util.py +++ b/poet/util.py @@ -5,10 +5,16 @@ import numpy as np import pandas as pd +from poet.architectures.bert import BERTBase +from poet.architectures.linear import make_linear_network +from poet.architectures.resnet import resnet18, resnet18_cifar, resnet50 +from poet.architectures.vgg import vgg16 +from poet.chipsets import M4F, MKR1000, JetsonTX2, RPi, RPiNoCache +from poet.power_computation import DNNLayer, GradientLayer, get_net_costs from poet.utils.checkmate.core.dfgraph import DFGraph from poet.utils.checkmate.core.graph_builder import GraphBuilder from poet.utils.checkmate.core.utils.definitions import PathLike -from poet.power_computation import DNNLayer, get_net_costs, GradientLayer +from poet.utils.checkmate.plot.graph_plotting import plot_dfgraph def save_network_repr(net: List[DNNLayer], readable_path: PathLike = None, pickle_path: PathLike = None): @@ -55,3 +61,54 @@ def extract_costs_from_dfgraph(g: DFGraph, sd_card_multipler=5.0): page_in_cost_vec = cpu_cost_vec * sd_card_multipler page_out_cost_vec = cpu_cost_vec * sd_card_multipler return cpu_cost_vec, page_in_cost_vec, page_out_cost_vec + +def get_chipset_and_net(platform: str, model: str, batch_size: int, mem_power_scale: float = 1.0): + if platform == "m0": + chipset = MKR1000 + elif platform == "a72": + chipset = RPi + elif platform == "a72nocache": + chipset = RPiNoCache + elif platform == "m4": + chipset = M4F + elif platform == "jetsontx2": + chipset = JetsonTX2 + else: + raise NotImplementedError() + + chipset["MEMORY_POWER"] *= mem_power_scale + + if model == "linear": + net = make_linear_network() + elif model == "vgg16": + net = vgg16(batch_size) + elif model == "vgg16_cifar": + net = vgg16(batch_size, 10, (3, 32, 32)) + elif model == "resnet18": + net = resnet18(batch_size) + elif model == "resnet50": + net = resnet50(batch_size) + elif model == "resnet18_cifar": + net = resnet18_cifar(batch_size, 10, (3, 32, 32)) + elif model == "bert": + net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=12) + elif model == "transformer": + net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=1) + else: + raise NotImplementedError() + + return chipset, net + +def plot_network( + platform: str, + model: str, + directory: str, + batch_size: int = 1, + mem_power_scale: float = 1.0, + format="pdf", + quiet=True, + name="" +): + chipset, net = get_chipset_and_net(platform, model, batch_size, mem_power_scale) + g, *_ = make_dfgraph_costs(net, chipset) + plot_dfgraph(g, directory, format, quiet, name) diff --git a/poet/utils/checkmate/core/enum_strategy.py b/poet/utils/checkmate/core/enum_strategy.py new file mode 100644 index 0000000..3491595 --- /dev/null +++ b/poet/utils/checkmate/core/enum_strategy.py @@ -0,0 +1,82 @@ +from enum import Enum + + +class SolveStrategy(Enum): + NOT_SPECIFIED = "NOT_SPECIFIED" + CHEN_SQRTN = "CHEN_SQRTN" + CHEN_GREEDY = "CHEN_GREEDY" + CHEN_SQRTN_NOAP = "CHEN_SQRTN_NOAP" + CHEN_GREEDY_NOAP = "CHEN_GREEDY_NOAP" + OPTIMAL_ILP_GC = "OPTIMAL_ILP_GC" + CHECKPOINT_LAST_NODE = "CHECKPOINT_LAST_NODE" + CHECKPOINT_ALL = "CHECKPOINT_ALL" + CHECKPOINT_ALL_AP = "CHECKPOINT_ALL_AP" + GRIEWANK_LOGN = "GRIEWANK_LOGN" + APPROX_DET_ROUND_LP_SWEEP = "APPROX_DET_ROUND_LP_SWEEP" + APPROX_DET_ROUND_LP_05_THRESH = "APPROX_DET_ROUND_LP_05_THRESH" + APPROX_DET_RANDOM_THRESH_ROUND_LP = "APPROX_DET_RANDOM_THRESH_ROUND_LP" + APPROX_RANDOMIZED_ROUND = "APPROX_RANDOMIZED_ROUND" + LB_LP = "LB_LP" + SIMRD = 'SIMRD' + SIMRD_MSPS = 'SIMRD_MSPS' + + @classmethod + def get_description(cls, val, model_name=None): + is_linear = model_name in ("VGG16", "VGG19", "MobileNet") + return { + cls.CHEN_SQRTN: "AP $\\sqrt{n}$", + cls.CHEN_GREEDY: "AP greedy", + cls.CHEN_SQRTN_NOAP: "Generalized $\\sqrt{n}$" if not is_linear else "Chen et al. $\\sqrt{n}$", + cls.CHEN_GREEDY_NOAP: "Generalized greedy", + cls.OPTIMAL_ILP_GC: "Optimal MILP (proposed)", + cls.CHECKPOINT_LAST_NODE: "Checkpoint last node", + cls.CHECKPOINT_ALL: "Checkpoint all (ideal)", + cls.CHECKPOINT_ALL_AP: "Checkpoint all APs", + cls.GRIEWANK_LOGN: "Griewank et al. $\\log~n$" if is_linear else "AP $\\log~n$", + cls.APPROX_DET_ROUND_LP_SWEEP: "Approximation via deterministic rounding of LP relaxation w/ threshold sweep", + cls.APPROX_DET_RANDOM_THRESH_ROUND_LP: "Approximation via deterministic rounding of LP relaxation with random thresholds", + cls.APPROX_DET_ROUND_LP_05_THRESH: "Approximation via deterministic rounding of LP relaxation w/ 0.5 threshold", + cls.APPROX_RANDOMIZED_ROUND: "Approximation via randomized rounding of LP relaxation", + cls.LB_LP: "Lower bound via LP relaxation", + cls.SIMRD: "Dynamic Tensor Rematerialization", + cls.SIMRD_MSPS: "Capuchin MSPS heuristic from DTR", + }[val] + + # todo move this to experiments codebase + @classmethod + def get_plot_params(cls, val): + from matplotlib import rcParams + + fullsize = rcParams["lines.markersize"] + halfsize = fullsize / 2 + bigger = fullsize * 1.5 + mapping = { + cls.CHEN_SQRTN: ("c", "D", halfsize), + cls.CHEN_SQRTN_NOAP: ("c", "^", halfsize), + cls.CHEN_GREEDY: ("g", ".", fullsize), + cls.CHEN_GREEDY_NOAP: ("g", "+", fullsize), + cls.CHECKPOINT_ALL: ("k", "*", bigger), + cls.CHECKPOINT_ALL_AP: ("b", "x", fullsize), + cls.GRIEWANK_LOGN: ("m", "p", fullsize), + cls.OPTIMAL_ILP_GC: ("r", "s", halfsize), + cls.APPROX_DET_ROUND_LP_SWEEP: ("r", "*", fullsize), + cls.APPROX_DET_ROUND_LP_05_THRESH: ("r", "^", halfsize), + cls.APPROX_DET_RANDOM_THRESH_ROUND_LP: ("r", "x", fullsize), + cls.APPROX_RANDOMIZED_ROUND: ("r", "+", fullsize), + cls.LB_LP: ("r", "p", fullsize), + cls.SIMRD: ("r", ".", fullsize), + cls.SIMRD_MSPS: ("m", ".", fullsize), + + } + if val in mapping: + return mapping[val] + raise NotImplementedError("No plotting parameters for strategy {}".format(val)) + + +class ImposedSchedule(Enum): + COVER_LAST_NODE = "COVER_LAST_NODE" + COVER_ALL_NODES = "COVER_ALL_NODES" + FULL_SCHEDULE = "FULL_SCHEDULE" + + def __str__(self): + return self.value diff --git a/poet/utils/checkmate/plot/graph_plotting.py b/poet/utils/checkmate/plot/graph_plotting.py new file mode 100644 index 0000000..4ce9547 --- /dev/null +++ b/poet/utils/checkmate/plot/graph_plotting.py @@ -0,0 +1,85 @@ +import pathlib +from typing import Optional + +import numpy as np +from graphviz import Digraph + +from poet.utils.checkmate.core.dfgraph import DFGraph +from poet.utils.checkmate.core.schedule import ScheduledResult +from poet.utils.checkmate.core.utils.definitions import PathLike + + +def plot_dfgraph(g: DFGraph, directory, format="pdf", quiet=True, name=""): + """Generate Graphviz-formatted edge list for visualization, and write pdf""" + print("Plotting network architecture...") + dot = Digraph("render_dfgraph" + str(name)) + dot.attr("graph") + for u in g.v: + node_name = g.node_names.get(u) + node_name = node_name if node_name is None else "{} ({})".format(node_name, str(u)) + attrs = {"style": "filled"} if g.is_backward_node(u) else {} + dot.node(str(u), node_name, **attrs) + for edge in g.edge_list: + dep_order = str(g.args[edge[-1]].index(edge[0])) + dot.edge(*map(str, edge), label=dep_order) + try: + dot.render(directory=directory, format=format, quiet=quiet) + except TypeError: + dot.render(directory=directory, format=format) + print("Saved network architecture plot to directory:", directory) + + +def plot_schedule( + sched_result: ScheduledResult, plot_mem_usage=False, save_file: Optional[PathLike] = None, show=False, plt=None +): + assert sched_result.feasible + R = sched_result.schedule_aux_data.R + S = sched_result.schedule_aux_data.S + U = None if sched_result.ilp_aux_data is None else sched_result.ilp_aux_data.U + mem_grid = None if sched_result.schedule_aux_data is None else sched_result.schedule_aux_data.mem_grid + _plot_schedule_from_rs(R, S, plot_mem_usage, mem_grid, U, save_file, show, plt) + + +def _plot_schedule_from_rs( + R, S, plot_mem_usage=False, mem_grid=None, U=None, save_file: Optional[PathLike] = None, show=False, plt=None +): + if plt is None: + import matplotlib.pyplot as plt + + if plot_mem_usage: + assert mem_grid is not None + fig, axs = plt.subplots(1, 4) + vmax = mem_grid + vmax = vmax if U is None else max(vmax, np.max(U)) + + # Plot slow verifier memory usage + axs[2].invert_yaxis() + axs[2].pcolormesh(mem_grid, cmap="Greys", vmin=0, vmax=vmax) + axs[2].set_title("Memory usage (verifier)") + + # Plot solver memory usage variables + axs[3].invert_yaxis() + axs[3].set_title("Memory usage (solved)") + if U is not None: + axs[3].pcolormesh(U, cmap="Greys", vmin=0, vmax=vmax) + + fig.set_size_inches(28, 6) + else: + fig, axs = plt.subplots(1, 2) + fig.set_size_inches(18, 6) + + axs[0].invert_yaxis() + axs[0].pcolormesh(R, cmap="Greys", vmin=0, vmax=1) + axs[0].set_title("R") + + axs[1].invert_yaxis() + axs[1].pcolormesh(S, cmap="Greys", vmin=0, vmax=1) + axs[1].set_title("S") + + if show: + plt.show() + if save_file: + path = pathlib.Path(save_file) + path.parents[0].mkdir(parents=True, exist_ok=True) + fig.savefig(path) + plt.close(fig) From 7d9c1e7ff07bc5e499e752b0ddb27999fba3798a Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 02:11:26 -0800 Subject: [PATCH 03/18] Update README to reflect new CLI command --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 222c059..65f42b9 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ If you are affiliated with an academic institution, you can acquire a free Gurob Once you have installed POET and optionally configured Gurobi, you can run the solver via the command line. Here's an example: ```bash -python -m poet --model resnet18_cifar --platform a72 --ram-budget 3000000 --runtime-budget 7.6 +python poet/solve.py --model resnet18_cifar --platform a72 --ram-budget 3000000 --runtime-budget 7.6 ``` ### Using the Solver API Directly From 8f4e51e339b10bde0df29e0c0f32818345950354 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 02:12:22 -0800 Subject: [PATCH 04/18] Update dependencies for plotting --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 478fd5e..db39745 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,11 @@ python_requires=">=3.8", install_requires=[ "numpy", + "matplotlib", "pandas", "loguru", "gurobipy", + "graphviz", "toposort", "pulp", ], From 39222e173ba470a006c59f2dc0c1934229a49e46 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 02:12:31 -0800 Subject: [PATCH 05/18] Standardize output from network architecture files --- poet/architectures/bert.py | 10 +++++----- poet/architectures/linear.py | 8 ++++---- poet/architectures/resnet.py | 34 +++++++++++++--------------------- poet/architectures/vgg.py | 5 +++-- 4 files changed, 25 insertions(+), 32 deletions(-) diff --git a/poet/architectures/bert.py b/poet/architectures/bert.py index d969f0a..830c9c9 100644 --- a/poet/architectures/bert.py +++ b/poet/architectures/bert.py @@ -64,10 +64,10 @@ def BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS totalEnergy += layer.energy(CHIPSET) print(layer) - print("### Total no of Paramters in network:", param_count) - print("### Total energy {fwd+backward} cost is:", totalEnergy) + print("### Total number of parameters in network:", param_count) + print("### Total energy (forward + backward) cost is:", totalEnergy) - print("### Profiles ###") # Print it out only if required, else looks illegible - resource = get_net_costs(net, MKR1000) + # print("### Profiles ###") + # resource = get_net_costs(net, MKR1000) # for _list in resource: - # print(_list, ":", resource[_list]) + # print(_list, ":", resource[_list]) diff --git a/poet/architectures/linear.py b/poet/architectures/linear.py index 5b520b4..9412293 100644 --- a/poet/architectures/linear.py +++ b/poet/architectures/linear.py @@ -36,7 +36,7 @@ def make_unit_linear_network(nfwd=12): for layer in net: print(layer) - print("### Profiles ###") - resource = get_net_costs(net, CHIPSET) - for _list in resource: - print(_list, resource[_list]) + # print("### Profiles ###") + # resource = get_net_costs(net, CHIPSET) + # for _list in resource: + # print(_list, resource[_list]) diff --git a/poet/architectures/resnet.py b/poet/architectures/resnet.py index 63902e5..ac837f2 100644 --- a/poet/architectures/resnet.py +++ b/poet/architectures/resnet.py @@ -1,17 +1,8 @@ -from poet.power_computation import ( - AvgPool2d, - BatchNorm2d, - Conv2dLayer, - CrossEntropyLoss, - FlattenLayer, - GradientLayer, - InputLayer, - LinearLayer, - MaxPool2d, - ReLULayer, - SkipAddLayer, - get_net_costs, -) +from poet.power_computation import (AvgPool2d, BatchNorm2d, Conv2dLayer, + CrossEntropyLoss, FlattenLayer, + GradientLayer, InputLayer, LinearLayer, + MaxPool2d, ReLULayer, SkipAddLayer, + get_net_costs) # Resnet implemented from the paper, not from PyTorch. @@ -161,7 +152,7 @@ def make_basic_block(in_planes, planes, stride, padding, x): from poet.chipsets import * CHIPSET = MKR1000 - print("### network ###") + print("### Network ###") net = resnet18_cifar(1) param_count = 0 @@ -170,10 +161,11 @@ def make_basic_block(in_planes, planes, stride, padding, x): param_count += layer.param_count totalEnergy += layer.energy(M4F) print(layer) - print("### Total no of Paramters in network:", param_count) - print("### Total energy {fwd+backward} cost is:", totalEnergy) - print("### Profiles ###") - resource = get_net_costs(net, CHIPSET) - for _list in resource: - print(_list, resource[_list]) + print("### Total number of parameters in network:", param_count) + print("### Total energy (forward + backward) cost is:", totalEnergy) + + # print("### Profiles ###") + # resource = get_net_costs(net, CHIPSET) + # for _list in resource: + # print(_list, resource[_list]) diff --git a/poet/architectures/vgg.py b/poet/architectures/vgg.py index b3a2b47..5ac180f 100644 --- a/poet/architectures/vgg.py +++ b/poet/architectures/vgg.py @@ -69,8 +69,9 @@ def make_conv_stack(in_channels, out_filters, kernel_size, x): totalEnergy += layer.energy(CHIPSET) totalRAM += layer.param_ram_usage(CHIPSET) print(layer) - print("### Total no of Paramters in network:", param_count) - print("### Total energy {fwd+backward} cost is:", totalEnergy) + + print("### Total number of parameters in network:", param_count) + print("### Total energy (forward + backward) cost is:", totalEnergy) # print("### Profiles ###") # resource = get_net_costs(net, MKR1000) From 019fe1bc594ff8eba600a48bf5d3a6d955c31a31 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 02:19:40 -0800 Subject: [PATCH 06/18] Add build directories to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index a5ed5b0..49bc2fd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +build +dist *.egg-info **/__pycache__/ *.log From c5f7219688ad379710a8b623703d783143d5f1ab Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Fri, 6 Jan 2023 15:00:58 -0800 Subject: [PATCH 07/18] Format files with Black --- poet/architectures/resnet.py | 19 ++++++++++++++----- poet/poet_solver.py | 5 +---- poet/power_computation.py | 5 ++--- poet/power_computation_transformer.py | 6 ++---- poet/util.py | 11 +++-------- poet/utils/checkmate/core/enum_strategy.py | 5 ++--- poet/utils/checkmate/plot/graph_plotting.py | 8 ++------ 7 files changed, 26 insertions(+), 33 deletions(-) diff --git a/poet/architectures/resnet.py b/poet/architectures/resnet.py index ac837f2..0e83061 100644 --- a/poet/architectures/resnet.py +++ b/poet/architectures/resnet.py @@ -1,8 +1,17 @@ -from poet.power_computation import (AvgPool2d, BatchNorm2d, Conv2dLayer, - CrossEntropyLoss, FlattenLayer, - GradientLayer, InputLayer, LinearLayer, - MaxPool2d, ReLULayer, SkipAddLayer, - get_net_costs) +from poet.power_computation import ( + AvgPool2d, + BatchNorm2d, + Conv2dLayer, + CrossEntropyLoss, + FlattenLayer, + GradientLayer, + InputLayer, + LinearLayer, + MaxPool2d, + ReLULayer, + SkipAddLayer, + get_net_costs, +) # Resnet implemented from the paper, not from PyTorch. diff --git a/poet/poet_solver.py b/poet/poet_solver.py index 2bce687..11da572 100644 --- a/poet/poet_solver.py +++ b/poet/poet_solver.py @@ -175,10 +175,7 @@ def is_feasible(self): def get_result(self, var_matrix, dtype=int): if not self.is_feasible(): return None - return [ - [dtype(pl.value(var_matrix[i][j])) for j in range(len(var_matrix[0]))] - for i in range(len(var_matrix)) - ] + return [[dtype(pl.value(var_matrix[i][j])) for j in range(len(var_matrix[0]))] for i in range(len(var_matrix))] def solve(self): with Timer("solve_timer") as t: diff --git a/poet/power_computation.py b/poet/power_computation.py index 2dbf53f..6511fec 100644 --- a/poet/power_computation.py +++ b/poet/power_computation.py @@ -243,10 +243,10 @@ def find_outshape(self, inputs): def get_net_costs(net, device): compute_energy_list, compute_runtime_list, ram_list, param_ram_list, pagein_cost, pageout_cost = [[] for _ in range(6)] - ''' + """ If you have access to hardware device, this costs should be obtained from accurate profiles. Else, POET adopts a flop based model. - ''' + """ for layer in net: compute_energy_list.append(layer.energy(device)) compute_runtime_list.append(layer.runtime(device)) @@ -263,4 +263,3 @@ def get_net_costs(net, device): pagein_cost_joules=pagein_cost, pageout_cost_joules=pageout_cost, ) - diff --git a/poet/power_computation_transformer.py b/poet/power_computation_transformer.py index 1501c1f..cea084a 100644 --- a/poet/power_computation_transformer.py +++ b/poet/power_computation_transformer.py @@ -2,7 +2,7 @@ import numpy as np -# FLOPS_PER_WATT is FLOP_PER_JOULE +# FLOPS_PER_WATT is FLOP_PER_JOULE from poet.chipsets import MKR1000 from poet.power_computation import DNNLayer @@ -24,9 +24,7 @@ def __init__(self, SEQ_LEN, HIDDEN_DIM, I, ATTN_HEADS, input): class QKTMatrix(DNNLayer): # Fusing Masking and Dropout def __init__(self, SEQ_LEN, HIDDEN_DIM, I, ATTN_HEADS, input): - super().__init__( - out_shape=(SEQ_LEN, I, ATTN_HEADS), depends_on=[input] if input is not None else [], param_count=0 - ) + super().__init__(out_shape=(SEQ_LEN, I, ATTN_HEADS), depends_on=[input] if input is not None else [], param_count=0) self.flop = SEQ_LEN * HIDDEN_DIM * I * ATTN_HEADS + np.prod(self.out_shape) + np.prod(self.out_shape) # QKT + mask + dropout diff --git a/poet/util.py b/poet/util.py index df8b2ec..86a5096 100644 --- a/poet/util.py +++ b/poet/util.py @@ -62,6 +62,7 @@ def extract_costs_from_dfgraph(g: DFGraph, sd_card_multipler=5.0): page_out_cost_vec = cpu_cost_vec * sd_card_multipler return cpu_cost_vec, page_in_cost_vec, page_out_cost_vec + def get_chipset_and_net(platform: str, model: str, batch_size: int, mem_power_scale: float = 1.0): if platform == "m0": chipset = MKR1000 @@ -99,15 +100,9 @@ def get_chipset_and_net(platform: str, model: str, batch_size: int, mem_power_sc return chipset, net + def plot_network( - platform: str, - model: str, - directory: str, - batch_size: int = 1, - mem_power_scale: float = 1.0, - format="pdf", - quiet=True, - name="" + platform: str, model: str, directory: str, batch_size: int = 1, mem_power_scale: float = 1.0, format="pdf", quiet=True, name="" ): chipset, net = get_chipset_and_net(platform, model, batch_size, mem_power_scale) g, *_ = make_dfgraph_costs(net, chipset) diff --git a/poet/utils/checkmate/core/enum_strategy.py b/poet/utils/checkmate/core/enum_strategy.py index 3491595..1b80206 100644 --- a/poet/utils/checkmate/core/enum_strategy.py +++ b/poet/utils/checkmate/core/enum_strategy.py @@ -17,8 +17,8 @@ class SolveStrategy(Enum): APPROX_DET_RANDOM_THRESH_ROUND_LP = "APPROX_DET_RANDOM_THRESH_ROUND_LP" APPROX_RANDOMIZED_ROUND = "APPROX_RANDOMIZED_ROUND" LB_LP = "LB_LP" - SIMRD = 'SIMRD' - SIMRD_MSPS = 'SIMRD_MSPS' + SIMRD = "SIMRD" + SIMRD_MSPS = "SIMRD_MSPS" @classmethod def get_description(cls, val, model_name=None): @@ -66,7 +66,6 @@ def get_plot_params(cls, val): cls.LB_LP: ("r", "p", fullsize), cls.SIMRD: ("r", ".", fullsize), cls.SIMRD_MSPS: ("m", ".", fullsize), - } if val in mapping: return mapping[val] diff --git a/poet/utils/checkmate/plot/graph_plotting.py b/poet/utils/checkmate/plot/graph_plotting.py index 4ce9547..1060c8e 100644 --- a/poet/utils/checkmate/plot/graph_plotting.py +++ b/poet/utils/checkmate/plot/graph_plotting.py @@ -29,9 +29,7 @@ def plot_dfgraph(g: DFGraph, directory, format="pdf", quiet=True, name=""): print("Saved network architecture plot to directory:", directory) -def plot_schedule( - sched_result: ScheduledResult, plot_mem_usage=False, save_file: Optional[PathLike] = None, show=False, plt=None -): +def plot_schedule(sched_result: ScheduledResult, plot_mem_usage=False, save_file: Optional[PathLike] = None, show=False, plt=None): assert sched_result.feasible R = sched_result.schedule_aux_data.R S = sched_result.schedule_aux_data.S @@ -40,9 +38,7 @@ def plot_schedule( _plot_schedule_from_rs(R, S, plot_mem_usage, mem_grid, U, save_file, show, plt) -def _plot_schedule_from_rs( - R, S, plot_mem_usage=False, mem_grid=None, U=None, save_file: Optional[PathLike] = None, show=False, plt=None -): +def _plot_schedule_from_rs(R, S, plot_mem_usage=False, mem_grid=None, U=None, save_file: Optional[PathLike] = None, show=False, plt=None): if plt is None: import matplotlib.pyplot as plt From 5709cb995d1d666838afde5a74975c4cbd9e564e Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 7 Jan 2023 00:36:16 -0800 Subject: [PATCH 08/18] Schedule plotting, input dependency fix, and result printing --- poet/poet_solver.py | 4 +- poet/poet_solver_gurobi.py | 2 +- poet/solve.py | 29 +---- poet/util.py | 79 +++++++++++++- .../utils/checkmate/core/utils/definitions.py | 6 -- poet/utils/checkmate/core/utils/scheduler.py | 6 +- .../checkmate/core/utils/solver_common.py | 101 ------------------ poet/utils/checkmate/plot/graph_plotting.py | 54 ---------- 8 files changed, 87 insertions(+), 194 deletions(-) delete mode 100644 poet/utils/checkmate/core/utils/solver_common.py diff --git a/poet/poet_solver.py b/poet/poet_solver.py index 11da572..f26b40c 100644 --- a/poet/poet_solver.py +++ b/poet/poet_solver.py @@ -18,7 +18,7 @@ class POETSolution: Mout: np.ndarray FreeE: np.ndarray U: np.ndarray - optimal: bool + finished: bool feasible: bool solve_time_s: Optional[float] = float("inf") @@ -188,7 +188,7 @@ def solve(self): Mout=self.get_result(self.MOut), FreeE=self.get_result(self.Free_E), U=self.get_result(self.U, dtype=float), - optimal=self.m.status == pl.LpStatusOptimal, + finished=self.m.status in [pl.LpStatusOptimal, pl.LpStatusInfeasible], feasible=self.is_feasible(), solve_time_s=t.elapsed, ) diff --git a/poet/poet_solver_gurobi.py b/poet/poet_solver_gurobi.py index d3c1f44..796d4ad 100644 --- a/poet/poet_solver_gurobi.py +++ b/poet/poet_solver_gurobi.py @@ -242,7 +242,7 @@ def solve( Mout=self.get_result(self.MOut, (self.T, self.T)), FreeE=self.get_result(self.Free_E, (self.T, len(self.g.edge_list))), U=self.get_result(self.U, (self.T, self.T), dtype=np.float), - optimal=self.m.status == GRB.OPTIMAL, + finished=self.m.status in [GRB.OPTIMAL, GRB.INFEASIBLE], feasible=is_feasible, solve_time_s=solve_time, ) diff --git a/poet/solve.py b/poet/solve.py index 8adbb13..4627d3e 100644 --- a/poet/solve.py +++ b/poet/solve.py @@ -1,13 +1,12 @@ from argparse import ArgumentParser from typing import Literal, Optional -import matplotlib.pyplot as plt import numpy as np from poet import solve -from poet.poet_solver import POETSolution, POETSolver +from poet.poet_solver import POETSolver from poet.poet_solver_gurobi import POETSolverGurobi -from poet.util import get_chipset_and_net, make_dfgraph_costs, plot_dfgraph +from poet.util import get_chipset_and_net, make_dfgraph_costs, plot_dfgraph, print_result def solve( @@ -179,26 +178,4 @@ def solve( plot_directory=args.plot_directory, ) - solution: POETSolution = result["solution"] - if solution.feasible: - optimal = solution.optimal - solution_msg = "successfully found an optimal solution" if solution.optimal else "found a feasible solution" - print( - f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']} J of CPU power and {result['total_power_cost_page']} J of memory paging power" - ) - if not solution.optimal: - print("This solution is not guaranteed to be optimal - you can try increasing the time limit to find an optimal solution") - - plt.matshow(solution.R) - plt.title("R") - plt.show() - - plt.matshow(solution.SRam) - plt.title("SRam") - plt.show() - - plt.matshow(solution.SSd) - plt.title("SSd") - plt.show() - else: - print("POET failed to find a feasible solution within the provided time limit") + print_result(result) diff --git a/poet/util.py b/poet/util.py index 86a5096..69807db 100644 --- a/poet/util.py +++ b/poet/util.py @@ -1,7 +1,8 @@ import pickle from pathlib import Path -from typing import List +from typing import List, Optional +import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -11,9 +12,11 @@ from poet.architectures.vgg import vgg16 from poet.chipsets import M4F, MKR1000, JetsonTX2, RPi, RPiNoCache from poet.power_computation import DNNLayer, GradientLayer, get_net_costs +from poet.poet_solver import POETSolution from poet.utils.checkmate.core.dfgraph import DFGraph from poet.utils.checkmate.core.graph_builder import GraphBuilder from poet.utils.checkmate.core.utils.definitions import PathLike +from poet.utils.checkmate.core.utils.scheduler import schedule_from_rs from poet.utils.checkmate.plot.graph_plotting import plot_dfgraph @@ -107,3 +110,77 @@ def plot_network( chipset, net = get_chipset_and_net(platform, model, batch_size, mem_power_scale) g, *_ = make_dfgraph_costs(net, chipset) plot_dfgraph(g, directory, format, quiet, name) + + +def print_result(result: dict): + solution: POETSolution = result["solution"] + if solution.feasible: + solution_msg = "successfully found an optimal solution" if solution.finished else "found a feasible solution" + print( + f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']:.5f} J of CPU power and {result['total_power_cost_page']:.5f} J of memory paging power" + ) + if not solution.finished: + print("This solution is not guaranteed to be optimal - you can try increasing the time limit to find an optimal solution") + + plt.matshow(solution.R) + plt.title("R") + plt.show() + + plt.matshow(solution.SRam) + plt.title("SRam") + plt.show() + + plt.matshow(solution.SSd) + plt.title("SSd") + plt.show() + + plot_schedule(solution.R, solution.SRam, show=True, plot_mem_usage=True) + elif solution.finished: + print("POET finished solving and determined that no feasible solution exists") + else: + print("POET failed to find a feasible solution within the provided time limit") + + +def plot_schedule(R, S, plot_mem_usage=False, mem_grid=None, U=None, save_file: Optional[PathLike] = None, show=False): + x, y = get_chipset_and_net("m0", "linear", 1, 1) + g, *_ = make_dfgraph_costs(y, x) + _, scheduler_aux_data = schedule_from_rs(g, np.array(R), np.array(S)) + mem_grid = scheduler_aux_data.mem_grid + + if plot_mem_usage: + # assert mem_grid is not None + fig, axs = plt.subplots(1, 4) + vmax = mem_grid + vmax = vmax if U is None else max(vmax, np.max(U)) + + # Plot slow verifier memory usage + axs[2].invert_yaxis() + axs[2].pcolormesh(mem_grid, cmap="Greys", vmin=0, vmax=vmax[0][0]) + axs[2].set_title("Memory usage (verifier)") + + # Plot solver memory usage variables + axs[3].invert_yaxis() + axs[3].set_title("Memory usage (solved)") + if U is not None: + axs[3].pcolormesh(U, cmap="Greys", vmin=0, vmax=vmax) + + fig.set_size_inches(28, 6) + else: + fig, axs = plt.subplots(1, 2) + fig.set_size_inches(18, 6) + + axs[0].invert_yaxis() + axs[0].pcolormesh(R, cmap="Greys", vmin=0, vmax=1) + axs[0].set_title("R") + + axs[1].invert_yaxis() + axs[1].pcolormesh(S, cmap="Greys", vmin=0, vmax=1) + axs[1].set_title("S") + + if show: + plt.show() + if save_file: + path = Path(save_file) + path.parents[0].mkdir(parents=True, exist_ok=True) + fig.savefig(path) + plt.close(fig) diff --git a/poet/utils/checkmate/core/utils/definitions.py b/poet/utils/checkmate/core/utils/definitions.py index 2a574af..0735d93 100644 --- a/poet/utils/checkmate/core/utils/definitions.py +++ b/poet/utils/checkmate/core/utils/definitions.py @@ -1,4 +1,3 @@ -import os import pathlib from typing import Union, Iterable, Tuple, Dict, List @@ -8,8 +7,3 @@ Vertex = int EdgeList = Iterable[Tuple[Vertex, Vertex]] AdjList = Dict[Vertex, List[Vertex]] - - -# environment variables -ENV_VAR_FLAGS = ["DEBUG_SCHEDULER_RAM"] -active_env_var_flags = {key for key in ENV_VAR_FLAGS if key in os.environ and os.environ[key].lower() in ("true", "t", "1")} diff --git a/poet/utils/checkmate/core/utils/scheduler.py b/poet/utils/checkmate/core/utils/scheduler.py index e303140..cb0242f 100644 --- a/poet/utils/checkmate/core/utils/scheduler.py +++ b/poet/utils/checkmate/core/utils/scheduler.py @@ -6,7 +6,6 @@ from poet.utils.checkmate.core.dfgraph import DFGraph from poet.utils.checkmate.core.schedule import OperatorEvaluation, AllocateRegister, DeallocateRegister, Schedule, SchedulerAuxData -from poet.utils.checkmate.core.utils.definitions import active_env_var_flags from poet.utils.checkmate.core.utils.timer import Timer @@ -25,6 +24,7 @@ def __init__(self, g, verbosity: int = 2): self.next_free_register_id = 0 self.verbosity = verbosity self.ram_timeline = [] # type: List[int] + self.allocate_register(0) def is_op_cached(self, op_id: int): return op_id in self.live_registers.keys() @@ -49,14 +49,14 @@ def allocate_register(self, op_id: int): return reg.register_id def run_operator(self, op_id: int, update_aux_vars: bool): - if not all([pred in self.live_registers.keys() for pred in self.g.predecessors(op_id)]): + if not all([pred == 0 or pred in self.live_registers.keys() for pred in self.g.predecessors(op_id)]): raise InfeasibleScheduleError( "Dependency not fulfilled for op #{}, ops in ram now are {} but I need {}".format( op_id, set(self.live_registers.keys()), self.g.predecessors(op_id) ) ) out_reg = self.allocate_register(op_id) - in_regs = {pred_id: self.live_registers[pred_id] for pred_id in self.g.predecessors(op_id)} + in_regs = {pred_id: self.live_registers[pred_id] for pred_id in self.g.predecessors(op_id) if pred_id != 0} eval_op = OperatorEvaluation( op_id, in_regs, diff --git a/poet/utils/checkmate/core/utils/solver_common.py b/poet/utils/checkmate/core/utils/solver_common.py deleted file mode 100644 index e06de32..0000000 --- a/poet/utils/checkmate/core/utils/solver_common.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Set - -import numpy as np - -from poet.utils.checkmate.core.dfgraph import DFGraph -from poet.utils.checkmate.core.utils.definitions import Vertex - -SOLVER_DTYPE = np.int - - -def setup_implied_s_backwards(g: DFGraph, s: np.ndarray = None): - """ - Given a backward graph, this function will set the appropriate items in S to 1 in order - to satisfy no-recompute rules during backwards optimization. - """ - s = s if s is not None else np.zeros((g.size, g.size), dtype=SOLVER_DTYPE) - for (start, end) in g.induce_subgraph(g.vbwd): - for t in range(start + 1, end + 1): - s[t, start] = 1 - return s - - -def gen_s_matrix_fixed_checkpoints(g: DFGraph, segment_set: Set[Vertex]): - """ - Given a list of checkpoint locations, this function will generate - as output S matrices denoting checkpoint schedule, given a set of - fixed segments (only recompute once). - """ - T = len(g.vfwd) - Ttotal = g.size - segment_set = list(sorted(segment_set)) - S = np.zeros((g.size, g.size), dtype=SOLVER_DTYPE) - # set minimum input requirements - for v in g.v: - for u in g.predecessors(v): - for t in range(u + 1, v): - S[t, u] = 1 - - # stripe every k nodes - for t in range(1, Ttotal): - for i in segment_set: - if i < t: - S[t, i] = 1 - - # checkpoint ladders - starts = [0] + list(map(lambda x: x, segment_set)) - ends = segment_set + [T + 1] - for start, end in zip(starts, ends): - for t in filter(lambda t: t < Ttotal, map(lambda x: Ttotal - x - 1, range(start, end))): - for i in range(start, min(t, end)): - S[t, i] = 1 - - # forward checkpoint block - for start, end in zip(starts, ends): - for t in filter(lambda t: t < Ttotal, range(start, end + 1)): - for i in range(start, min(t, end)): - S[t, i] = 1 - - # backward checkpoint block - # originally used as baselines will checkpoint whole blocks (e.g. Chen 2016 checkpoints entire backwards blocks), - # but removed in public release as schedules are faster without this. - # for start, end in zip(starts, ends): - # for t in filter(lambda _t: _t < Ttotal, range(start, end + 1)): - # back_t = Ttotal - 1 - t - # for i in range(start, end): - # back_i = g.forward_to_backward(i) - # if back_i is not None and back_i < back_t: - # S[back_t, back_i] = 1 - - S = setup_implied_s_backwards(g, S) - return S - - -def solve_r_opt(g: DFGraph, s: np.ndarray): - """Find the optimal recomputation pattern given caching decisions. - Given S, E = [(i, j)] where node j depends on the result of node i, - find R that minimizes cost, satisfies constraints. Assumes recomputation - costs are nonnegative. - - NOTE: Does NOT check if memory limits are exceeded. - Enforcing R[t,i] != S[t,i] does not seem to be necessary. - """ - T = s.shape[0] - assert s.shape[1] == T - - R = np.eye(T, dtype=s.dtype) # Enforce R_t,t = 1 - # Enforce S_{t+1,v} <= S_{t,v} + R_{t,v}, - # i.e. R_{t,v} >= S_{t+1,v} - S_{t,v} - sdiff = s[1:] - s[:-1] - R[:-1] = R[:-1] | (R[:-1] < sdiff) - # Create reverse adjacency list (child -> parents, i.e. node -> dependencies) - adj = [[] for _ in range(T)] - for (u, v) in g.edge_list: - adj[v].append(u) - # Enforce R_{t,v} <= R_{t,u} + S_{t,u} for all (u, v) \in E - for t in range(T): - for v in range(t, -1, -1): - for u in adj[v]: - if R[t, v] > R[t, u] + s[t, u]: - R[t, u] = 1 - return R diff --git a/poet/utils/checkmate/plot/graph_plotting.py b/poet/utils/checkmate/plot/graph_plotting.py index 1060c8e..3081d19 100644 --- a/poet/utils/checkmate/plot/graph_plotting.py +++ b/poet/utils/checkmate/plot/graph_plotting.py @@ -5,8 +5,6 @@ from graphviz import Digraph from poet.utils.checkmate.core.dfgraph import DFGraph -from poet.utils.checkmate.core.schedule import ScheduledResult -from poet.utils.checkmate.core.utils.definitions import PathLike def plot_dfgraph(g: DFGraph, directory, format="pdf", quiet=True, name=""): @@ -27,55 +25,3 @@ def plot_dfgraph(g: DFGraph, directory, format="pdf", quiet=True, name=""): except TypeError: dot.render(directory=directory, format=format) print("Saved network architecture plot to directory:", directory) - - -def plot_schedule(sched_result: ScheduledResult, plot_mem_usage=False, save_file: Optional[PathLike] = None, show=False, plt=None): - assert sched_result.feasible - R = sched_result.schedule_aux_data.R - S = sched_result.schedule_aux_data.S - U = None if sched_result.ilp_aux_data is None else sched_result.ilp_aux_data.U - mem_grid = None if sched_result.schedule_aux_data is None else sched_result.schedule_aux_data.mem_grid - _plot_schedule_from_rs(R, S, plot_mem_usage, mem_grid, U, save_file, show, plt) - - -def _plot_schedule_from_rs(R, S, plot_mem_usage=False, mem_grid=None, U=None, save_file: Optional[PathLike] = None, show=False, plt=None): - if plt is None: - import matplotlib.pyplot as plt - - if plot_mem_usage: - assert mem_grid is not None - fig, axs = plt.subplots(1, 4) - vmax = mem_grid - vmax = vmax if U is None else max(vmax, np.max(U)) - - # Plot slow verifier memory usage - axs[2].invert_yaxis() - axs[2].pcolormesh(mem_grid, cmap="Greys", vmin=0, vmax=vmax) - axs[2].set_title("Memory usage (verifier)") - - # Plot solver memory usage variables - axs[3].invert_yaxis() - axs[3].set_title("Memory usage (solved)") - if U is not None: - axs[3].pcolormesh(U, cmap="Greys", vmin=0, vmax=vmax) - - fig.set_size_inches(28, 6) - else: - fig, axs = plt.subplots(1, 2) - fig.set_size_inches(18, 6) - - axs[0].invert_yaxis() - axs[0].pcolormesh(R, cmap="Greys", vmin=0, vmax=1) - axs[0].set_title("R") - - axs[1].invert_yaxis() - axs[1].pcolormesh(S, cmap="Greys", vmin=0, vmax=1) - axs[1].set_title("S") - - if show: - plt.show() - if save_file: - path = pathlib.Path(save_file) - path.parents[0].mkdir(parents=True, exist_ok=True) - fig.savefig(path) - plt.close(fig) From adf13bfa04f348c760f6053fe226942bc17e79c1 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 7 Jan 2023 16:53:48 -0800 Subject: [PATCH 09/18] Add POET Server (#4) Co-authored-by: Shishir Patil --- poet-server/Dockerfile | 20 +++++++++ poet-server/README.md | 76 ++++++++++++++++++++++++++++++++++ poet-server/docker-compose.yml | 14 +++++++ poet-server/requirements.txt | 2 + poet-server/server.py | 61 +++++++++++++++++++++++++++ poet/util.py | 68 +++++------------------------- 6 files changed, 184 insertions(+), 57 deletions(-) create mode 100644 poet-server/Dockerfile create mode 100644 poet-server/README.md create mode 100644 poet-server/docker-compose.yml create mode 100644 poet-server/requirements.txt create mode 100644 poet-server/server.py diff --git a/poet-server/Dockerfile b/poet-server/Dockerfile new file mode 100644 index 0000000..f8e7f0e --- /dev/null +++ b/poet-server/Dockerfile @@ -0,0 +1,20 @@ +# TODO: currently the build context uses the entire POET repo since it installs +# the poet-ai package locally; it should be isolated in the future + +FROM python:3.10-slim + +# Install server dependencies +WORKDIR /app/poet-server +ADD poet-server/requirements.txt requirements.txt +RUN pip3 install -r requirements.txt + +# Install the poet-ai package +WORKDIR /app +ADD setup.py setup.py +ADD poet poet +RUN pip3 install -e . + +WORKDIR /app/poet-server +ADD poet-server/server.py server.py + +CMD ["python", "server.py"] diff --git a/poet-server/README.md b/poet-server/README.md new file mode 100644 index 0000000..1a964cc --- /dev/null +++ b/poet-server/README.md @@ -0,0 +1,76 @@ +# POET Server Setup + +In this guide, we will show you how to set up the POET ILP server for use locally or as a hosted service. We use this to host our POET server that powers requests from the [POET Demo Colab](https://colab.research.google.com/drive/1iup_edJd9zB1tfVBHXLmkWOT5yoSmXzz?usp=sharing) notebook. You do not need to set-up a POET-server to use POET, but feel free to use it to set-up your hosted service. + +POET's Integer Linear Program (ILP) formulation is compatible with a variety of solvers, including Gurobi and COIN-OR CBC. In this guide, we will demonstrate how to set it up using both of these solvers. + +## Setting Up Gurobi (Optional) + +The ILP solver defaults to using the COIN-OR CBC solver when Gurobi isn't available. However, since Gurobi is much faster, it is recommended to install it where possible. + +### Acquiring a Free Academic Gurobi Web License + +1. Create a free Gurobi account [here](https://pages.gurobi.com/registration). Make sure to specify the Academic user option. +2. Complete the rest of the Gurobi account creation process, which will include creating a password and verifying your email address. +3. Login to the Gurobi [Web License Manager](https://license.gurobi.com/) using your new account. +4. Create and download a new Web License file. This will be a `gurobi.lic` file that you will need in later steps, so keep note of where you save it. + +## Option 1: Running the Server Locally + +1. If using Gurobi, move the `gurobi.lic` file you downloaded in the previous step to your home directory (i.e. to `~/gurobi.lic`). +2. Clone this repository by running `git clone https://github.com/ShishirPatil/poet`. +3. Run `pip3 install -e .` in this repository's root directory to install the `poet-ai` package. +4. Run `cd poet-server` to navigate to the `poet-server` directory. +5. Run `pip3 install -r requirements.txt` to install the ILP server dependencies. +6. Finally, run `python3 server.py` to start the server. + - You can optionally run `DEV=1 python3 server.py` to enable reload mode, which will automatically restart the server when you make changes to the code. +7. You can now make requests to the server at `http://localhost/solve`. + +## Option 2: Running the Server Locally within a Docker Container + +We include a Docker image that can be used to run the server. + +Prebuilt Docker images are available at `public.ecr.aws/i5z6k9k2/poet-server` + +You can pull an image and start the server using: + +```bash +docker pull public.ecr.aws/i5z6k9k2/poet-server:latest +docker run -p 80:80 -v ~/gurobi.lic:/opt/gurobi/gurobi.lic public.ecr.aws/i5z6k9k2/poet-server +``` + +Or, you can build the docker container yourself following the steps below. + + +1. Ensure you have [Docker Compose](https://docs.docker.com/compose/install/) installed. +2. Clone this repository by running `git clone https://github.com/ShishirPatil/poet`. +3. If using Gurobi, move the `gurobi.lic` file you downloaded in the previous step to the `poet-server` directory of this repository (i.e. to `poet-server/gurobi.lic`). +4. Run `cd poet-server` to navigate to the `poet-server` directory. +5. Run `docker compose up --build` to build and start the Docker container. +6. You can now make GET requests to the server at `http://localhost/solve` as shown below. + +## Option 3: Hosting POET server on an AWS EC2 Instance + +Ensure that you have moved the `gurobi.lic` file (if you want to use the Gurobi optimizer) you downloaded earlier to the EC2 instance. Ensure that Port 80 is open for ingress traffic. + + +## Making Requests + +To issue requests to the POET server, you can use the following Python code. Here, we use the demo POET-server hosted at IP `54.189.43.62`: + +```python +import requests + +response = requests.get("http://54.189.43.62/solve", { + "model": "linear", + "platform": "m0", + "ram_budget": 90000000, + "runtime_budget": 1.253, + "solver": "gurobi", +}) + +print(response.json()) +``` + + + diff --git a/poet-server/docker-compose.yml b/poet-server/docker-compose.yml new file mode 100644 index 0000000..ca5feb7 --- /dev/null +++ b/poet-server/docker-compose.yml @@ -0,0 +1,14 @@ +version: "3.3" + +services: + server: + image: public.ecr.aws/i5z6k9k2/poet-server:latest + shm_size: 2.5gb + tty: true + ports: + - 80:80 + build: + context: ../ + dockerfile: poet-server/Dockerfile + volumes: + - ${PWD}/gurobi.lic:/opt/gurobi/gurobi.lic diff --git a/poet-server/requirements.txt b/poet-server/requirements.txt new file mode 100644 index 0000000..364e2ee --- /dev/null +++ b/poet-server/requirements.txt @@ -0,0 +1,2 @@ +fastapi +uvicorn[standard] diff --git a/poet-server/server.py b/poet-server/server.py new file mode 100644 index 0000000..22f4d47 --- /dev/null +++ b/poet-server/server.py @@ -0,0 +1,61 @@ +import os +from typing import Literal, Optional + +import uvicorn +from fastapi import FastAPI, HTTPException +from loguru import logger + +from poet import solve + +SOLVE_THREADS = min(4, os.cpu_count()) + +app = FastAPI() + + +@app.get("/solve") +def solve_handler( + model: Literal[ + "linear", + "vgg16", + "vgg16_cifar", + "resnet18", + "resnet50", + "resnet18_cifar", + "bert", + "transformer", + ], + platform: Literal["m0", "a72", "a72nocache", "m4", "jetsontx2"], + ram_budget: float, + runtime_budget: float, + paging: int = 1, + remat: int = 1, + mem_power_scale=1.0, + batch_size=1, + use_actual_gurobi: Optional[bool] = None, + solver: Optional[Literal["gurobi", "cbc"]] = None, + time_limit_s: float = 1e100, + solve_threads: int = SOLVE_THREADS, # different default than a direct solve +): + try: + return solve( + model=model, + platform=platform, + ram_budget=ram_budget, + runtime_budget=runtime_budget, + paging=paging, + remat=remat, + mem_power_scale=mem_power_scale, + batch_size=batch_size, + use_actual_gurobi=use_actual_gurobi, + solver=solver, + time_limit_s=time_limit_s, + solve_threads=solve_threads, + ) + except Exception as e: + logger.exception(e) + raise HTTPException(status_code=500, detail=str(e)) + + +if __name__ == "__main__": + logger.info("Initializing an instance of the POET server.") + uvicorn.run("server:app", host="0.0.0.0", port=80, reload=os.environ.get("DEV")) diff --git a/poet/util.py b/poet/util.py index 69807db..9f300e0 100644 --- a/poet/util.py +++ b/poet/util.py @@ -1,6 +1,6 @@ import pickle from pathlib import Path -from typing import List, Optional +from typing import List import matplotlib.pyplot as plt import numpy as np @@ -11,12 +11,11 @@ from poet.architectures.resnet import resnet18, resnet18_cifar, resnet50 from poet.architectures.vgg import vgg16 from poet.chipsets import M4F, MKR1000, JetsonTX2, RPi, RPiNoCache -from poet.power_computation import DNNLayer, GradientLayer, get_net_costs from poet.poet_solver import POETSolution +from poet.power_computation import DNNLayer, GradientLayer, get_net_costs from poet.utils.checkmate.core.dfgraph import DFGraph from poet.utils.checkmate.core.graph_builder import GraphBuilder from poet.utils.checkmate.core.utils.definitions import PathLike -from poet.utils.checkmate.core.utils.scheduler import schedule_from_rs from poet.utils.checkmate.plot.graph_plotting import plot_dfgraph @@ -115,12 +114,14 @@ def plot_network( def print_result(result: dict): solution: POETSolution = result["solution"] if solution.feasible: - solution_msg = "successfully found an optimal solution" if solution.finished else "found a feasible solution" + solution_msg = "successfully found an optimal solution" if solution.optimal else "found a feasible solution" print( - f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']:.5f} J of CPU power and {result['total_power_cost_page']:.5f} J of memory paging power" + f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']} J of CPU power and {result['total_power_cost_page']} J of memory paging power" ) - if not solution.finished: - print("This solution is not guaranteed to be optimal - you can try increasing the time limit to find an optimal solution") + if not solution.optimal: + print( + "This solution is not guaranteed to be optimal - you can try increasing the solve time [time_limit_s] to find an optimal solution" + ) plt.matshow(solution.R) plt.title("R") @@ -133,54 +134,7 @@ def print_result(result: dict): plt.matshow(solution.SSd) plt.title("SSd") plt.show() - - plot_schedule(solution.R, solution.SRam, show=True, plot_mem_usage=True) - elif solution.finished: - print("POET finished solving and determined that no feasible solution exists") - else: - print("POET failed to find a feasible solution within the provided time limit") - - -def plot_schedule(R, S, plot_mem_usage=False, mem_grid=None, U=None, save_file: Optional[PathLike] = None, show=False): - x, y = get_chipset_and_net("m0", "linear", 1, 1) - g, *_ = make_dfgraph_costs(y, x) - _, scheduler_aux_data = schedule_from_rs(g, np.array(R), np.array(S)) - mem_grid = scheduler_aux_data.mem_grid - - if plot_mem_usage: - # assert mem_grid is not None - fig, axs = plt.subplots(1, 4) - vmax = mem_grid - vmax = vmax if U is None else max(vmax, np.max(U)) - - # Plot slow verifier memory usage - axs[2].invert_yaxis() - axs[2].pcolormesh(mem_grid, cmap="Greys", vmin=0, vmax=vmax[0][0]) - axs[2].set_title("Memory usage (verifier)") - - # Plot solver memory usage variables - axs[3].invert_yaxis() - axs[3].set_title("Memory usage (solved)") - if U is not None: - axs[3].pcolormesh(U, cmap="Greys", vmin=0, vmax=vmax) - - fig.set_size_inches(28, 6) else: - fig, axs = plt.subplots(1, 2) - fig.set_size_inches(18, 6) - - axs[0].invert_yaxis() - axs[0].pcolormesh(R, cmap="Greys", vmin=0, vmax=1) - axs[0].set_title("R") - - axs[1].invert_yaxis() - axs[1].pcolormesh(S, cmap="Greys", vmin=0, vmax=1) - axs[1].set_title("S") - - if show: - plt.show() - if save_file: - path = Path(save_file) - path.parents[0].mkdir(parents=True, exist_ok=True) - fig.savefig(path) - plt.close(fig) + print( + "POET failed to find a feasible solution within the provided time limit. \n Either a) increase the memory and training time budgets, and/or b) increase the solve time [total_power_cost_page]" + ) From 164767ba70d22592c6851827d830aedfc8f6ce98 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 7 Jan 2023 23:03:47 -0800 Subject: [PATCH 10/18] Fix invalid reference in print_result utility --- poet/util.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/poet/util.py b/poet/util.py index 9f300e0..e1c9276 100644 --- a/poet/util.py +++ b/poet/util.py @@ -114,14 +114,12 @@ def plot_network( def print_result(result: dict): solution: POETSolution = result["solution"] if solution.feasible: - solution_msg = "successfully found an optimal solution" if solution.optimal else "found a feasible solution" + solution_msg = "successfully found an optimal solution" if solution.finished else "found a feasible solution" print( - f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']} J of CPU power and {result['total_power_cost_page']} J of memory paging power" + f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']:.5f} J of CPU power and {result['total_power_cost_page']:.5f} J of memory paging power" ) - if not solution.optimal: - print( - "This solution is not guaranteed to be optimal - you can try increasing the solve time [time_limit_s] to find an optimal solution" - ) + if not solution.finished: + print("This solution is not guaranteed to be optimal - you can try increasing the time limit to find an optimal solution") plt.matshow(solution.R) plt.title("R") From 187d9ed7b385d4384f4d69965aa7a08aaa1ce655 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 14 Jan 2023 03:05:56 -0800 Subject: [PATCH 11/18] Remove .editorconfig --- .editorconfig | 12 ------------ .gitignore | 1 + 2 files changed, 1 insertion(+), 12 deletions(-) delete mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index f72537c..0000000 --- a/.editorconfig +++ /dev/null @@ -1,12 +0,0 @@ -# This file contains contains settings which ensure consistent formatting for files -# including indentation and newlines across various editors - -root = true - -[*] -charset = utf-8 -end_of_line = lf -indent_size = 4 -indent_style = space -insert_final_newline = true -trim_trailing_whitespace = true diff --git a/.gitignore b/.gitignore index 49bc2fd..3d7b707 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist *.log **/*.lic .vscode +.editorconfig From ca7e8e632558143ce0ea8c7da804925b79ea7674 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 14 Jan 2023 03:06:25 -0800 Subject: [PATCH 12/18] Add POETResult instead of dict result --- poet/poet_solver_gurobi.py | 2 +- poet/solve.py | 13 ++++++------- poet/util.py | 18 +++++++++++++++--- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/poet/poet_solver_gurobi.py b/poet/poet_solver_gurobi.py index 796d4ad..954b9fd 100644 --- a/poet/poet_solver_gurobi.py +++ b/poet/poet_solver_gurobi.py @@ -241,7 +241,7 @@ def solve( Min=self.get_result(self.MIn, (self.T, self.T)), Mout=self.get_result(self.MOut, (self.T, self.T)), FreeE=self.get_result(self.Free_E, (self.T, len(self.g.edge_list))), - U=self.get_result(self.U, (self.T, self.T), dtype=np.float), + U=self.get_result(self.U, (self.T, self.T), dtype=float), finished=self.m.status in [GRB.OPTIMAL, GRB.INFEASIBLE], feasible=is_feasible, solve_time_s=solve_time, diff --git a/poet/solve.py b/poet/solve.py index 4627d3e..e65e24b 100644 --- a/poet/solve.py +++ b/poet/solve.py @@ -1,13 +1,13 @@ from argparse import ArgumentParser +from dataclasses import dataclass from typing import Literal, Optional import numpy as np from poet import solve -from poet.poet_solver import POETSolver +from poet.poet_solver import POETSolution, POETSolver from poet.poet_solver_gurobi import POETSolverGurobi -from poet.util import get_chipset_and_net, make_dfgraph_costs, plot_dfgraph, print_result - +from poet.util import get_chipset_and_net, make_dfgraph_costs, plot_dfgraph, print_result, POETResult def solve( model: Literal[ @@ -122,17 +122,16 @@ def solve( else: total_power_cost_page, total_power_cost_cpu, total_runtime = None, None, None - result = dict( - ram_budget_bytes=ram_budget, + result = POETResult( + ram_budget=ram_budget, runtime_budget_ms=runtime_budget_ms, paging=paging, remat=remat, - integral=True, - solution=solution, total_power_cost_page=total_power_cost_page, total_power_cost_cpu=total_power_cost_cpu, total_runtime=total_runtime, feasible=solution.feasible, + solution=solution ) return result diff --git a/poet/util.py b/poet/util.py index e1c9276..a8318de 100644 --- a/poet/util.py +++ b/poet/util.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass import pickle from pathlib import Path from typing import List @@ -18,6 +19,17 @@ from poet.utils.checkmate.core.utils.definitions import PathLike from poet.utils.checkmate.plot.graph_plotting import plot_dfgraph +@dataclass +class POETResult: + ram_budget: float + runtime_budget_ms: float + paging: bool + remat: bool + total_power_cost_page: float + total_power_cost_cpu: float + total_runtime: float + feasible: bool + solution: POETSolution def save_network_repr(net: List[DNNLayer], readable_path: PathLike = None, pickle_path: PathLike = None): if readable_path is not None: @@ -111,12 +123,12 @@ def plot_network( plot_dfgraph(g, directory, format, quiet, name) -def print_result(result: dict): - solution: POETSolution = result["solution"] +def print_result(result: POETResult): + solution = result.solution if solution.feasible: solution_msg = "successfully found an optimal solution" if solution.finished else "found a feasible solution" print( - f"POET {solution_msg} with a memory budget of {result['ram_budget_bytes']} bytes that consumes {result['total_power_cost_cpu']:.5f} J of CPU power and {result['total_power_cost_page']:.5f} J of memory paging power" + f"POET {solution_msg} with a memory budget of {result.ram_budget} bytes that consumes {result.total_power_cost_cpu:.5f} J of CPU power and {result.total_power_cost_page:.5f} J of memory paging power" ) if not solution.finished: print("This solution is not guaranteed to be optimal - you can try increasing the time limit to find an optimal solution") From 51139cd00e55b93697c347e36cec82442784b0a4 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 14 Jan 2023 03:06:45 -0800 Subject: [PATCH 13/18] Setup GitHub Actions and pytest --- .github/workflows/main.yml | 25 +++++++++++++++++++++++++ test/test_cli.py | 7 +++++++ 2 files changed, 32 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 test/test_cli.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..2398205 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,25 @@ +name: Python package + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: 3.10 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest black + pip install -e . + - name: Lint with Black + run: | + black --check --line-length 140 . + - name: Test with pytest + run: | + pytest diff --git a/test/test_cli.py b/test/test_cli.py new file mode 100644 index 0000000..5152745 --- /dev/null +++ b/test/test_cli.py @@ -0,0 +1,7 @@ +import re +import subprocess + +def test_readme_example(): + command = "python poet/solve.py --model resnet18_cifar --platform a72 --ram-budget 3000000 --runtime-budget 7.6" + output = subprocess.check_output(command, shell=True).decode("utf-8") + assert re.test(r"POET successfully found an optimal solution with a memory budget of 3000000 bytes that consumes 7.8\d+ J of CPU power and 0.001\d+ J of memory paging power", output) From f660c82f6e45809ea187212995993853ed62c5ba Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 14 Jan 2023 03:08:58 -0800 Subject: [PATCH 14/18] Fix Python version in GitHub Actions --- .github/workflows/main.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2398205..88bdbd9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,9 +1,10 @@ -name: Python package +name: POET on: [push] jobs: build: + name: Build, lint, and test runs-on: ubuntu-latest steps: @@ -11,7 +12,7 @@ jobs: - name: Set up Python 3.10 uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: "3.10" - name: Install dependencies run: | python -m pip install --upgrade pip From ea93b2b7b231bd19c9adcdbe97de5ee571f40307 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Sat, 14 Jan 2023 03:10:20 -0800 Subject: [PATCH 15/18] Fix formatting with Black --- .github/workflows/main.yml | 2 +- poet/solve.py | 3 ++- poet/util.py | 2 ++ test/test_cli.py | 6 +++++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 88bdbd9..10b8f41 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,7 @@ jobs: python -m pip install --upgrade pip pip install pytest black pip install -e . - - name: Lint with Black + - name: Test formatting with Black run: | black --check --line-length 140 . - name: Test with pytest diff --git a/poet/solve.py b/poet/solve.py index e65e24b..98e77fc 100644 --- a/poet/solve.py +++ b/poet/solve.py @@ -9,6 +9,7 @@ from poet.poet_solver_gurobi import POETSolverGurobi from poet.util import get_chipset_and_net, make_dfgraph_costs, plot_dfgraph, print_result, POETResult + def solve( model: Literal[ "linear", @@ -131,7 +132,7 @@ def solve( total_power_cost_cpu=total_power_cost_cpu, total_runtime=total_runtime, feasible=solution.feasible, - solution=solution + solution=solution, ) return result diff --git a/poet/util.py b/poet/util.py index a8318de..0987b1f 100644 --- a/poet/util.py +++ b/poet/util.py @@ -19,6 +19,7 @@ from poet.utils.checkmate.core.utils.definitions import PathLike from poet.utils.checkmate.plot.graph_plotting import plot_dfgraph + @dataclass class POETResult: ram_budget: float @@ -31,6 +32,7 @@ class POETResult: feasible: bool solution: POETSolution + def save_network_repr(net: List[DNNLayer], readable_path: PathLike = None, pickle_path: PathLike = None): if readable_path is not None: with Path(readable_path).open("w") as f: diff --git a/test/test_cli.py b/test/test_cli.py index 5152745..e104542 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -1,7 +1,11 @@ import re import subprocess + def test_readme_example(): command = "python poet/solve.py --model resnet18_cifar --platform a72 --ram-budget 3000000 --runtime-budget 7.6" output = subprocess.check_output(command, shell=True).decode("utf-8") - assert re.test(r"POET successfully found an optimal solution with a memory budget of 3000000 bytes that consumes 7.8\d+ J of CPU power and 0.001\d+ J of memory paging power", output) + assert re.test( + r"POET successfully found an optimal solution with a memory budget of 3000000 bytes that consumes 7.8\d+ J of CPU power and 0.001\d+ J of memory paging power", + output, + ) From 63f457bf695d4f20f1755656ad8a3f45610c8fc2 Mon Sep 17 00:00:00 2001 From: Shishir Patil Date: Sun, 15 Jan 2023 00:00:26 +0000 Subject: [PATCH 16/18] Removing test since till gurobi set-up as github secret --- .github/workflows/main.yml | 6 +++--- README.md | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 10b8f41..e277b9a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,6 +21,6 @@ jobs: - name: Test formatting with Black run: | black --check --line-length 140 . - - name: Test with pytest - run: | - pytest + # - name: Test with pytest + # run: | + # pytest diff --git a/README.md b/README.md index 65f42b9..449628e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # POET -By Shishir G. Patil, Paras Jain, Prabal Dutta, Ion Stoica, and Joseph E. Gonzalez ([Project Website](https://shishirpatil.github.io/poet/)) +By Shishir G. Patil, Paras Jain, Prabal Dutta, Ion Stoica, and Joseph E. Gonzalez ([Project Website](https://poet.cs.berkeley.edu/)) -![](assets/img/logo.png) +![](https://github.com/ShishirPatil/poet/blob/gh-pages/assets/img/logo.png) _See the paper!_ [https://arxiv.org/abs/2207.07697](https://arxiv.org/abs/2207.07697) @@ -13,14 +13,16 @@ ResNets on smartphones and tiny ARM Cortex-M devices :muscle: Reach out to us at [sgp@berkeley.edu](mailto:sgp@berkeley.edu), if you have large models that you are trying to train - be it on GPUs, or your commodity edge devices such as laptops, smartphones, raspberry-pis, ARM Cortex M and A class, fitbits, etc. -## Get Started +## Get Started [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1iup_edJd9zB1tfVBHXLmkWOT5yoSmXzz?usp=sharing) ### Installation -You can install POET with: +Clone the repository and install POET: ```bash -python setup.py install +git clone https://github.com/ShishirPatil/poet.git +cd poet/ +pip install -e . ``` ### Setting Up Gurobi (Recommended) From e90dd61c5b44c46a8a828f5d98bd4356fe375bad Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Wed, 25 Jan 2023 15:20:41 -0800 Subject: [PATCH 17/18] Initial Pareto curve implementation --- poet/pareto.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ poet/solve.py | 8 ++--- poet/util.py | 26 ++++++++++++--- 3 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 poet/pareto.py diff --git a/poet/pareto.py b/poet/pareto.py new file mode 100644 index 0000000..eacf66d --- /dev/null +++ b/poet/pareto.py @@ -0,0 +1,90 @@ +from poet import solve +from typing import Literal, Optional +from poet.util import get_chipset_and_net, make_dfgraph_costs, get_net_costs, print_result +import numpy as np +import os +from concurrent.futures import ProcessPoolExecutor +import matplotlib.pyplot as plt + +def simple_solve( + params +): + return solve( + **params + ) + + +def pareto( + model: Literal[ + "linear", + "vgg16", + "vgg16_cifar", + "resnet18", + "resnet50", + "resnet18_cifar", + "bert", + "transformer", + ], + platform: Literal["m0", "a72", "a72nocache", "m4", "jetsontx2"], + # ram_budget: float, + runtime_budget: float = 1.4, + mem_power_scale=1.0, + batch_size=1, + ram_budget_samples: int = 100, + use_actual_gurobi: Optional[bool] = True, + solver: Optional[Literal["gurobi", "cbc"]] = None, + time_limit_s: float = 1e100, + solve_threads: int = 4, + total_threads: int = os.cpu_count(), +): + plt.ion() + + chipset, net, ram_budget_start, ram_budget_end = get_chipset_and_net( + platform=platform, + model=model, + batch_size=batch_size, + mem_power_scale=mem_power_scale, + ) + + base_memory = max(get_net_costs(net=net, device=chipset)["memory_bytes"]) + print(base_memory, ram_budget_start, ram_budget_end, ram_budget_start / base_memory, ram_budget_end / base_memory) + + ram_budget_range = np.linspace(0, ram_budget_end, ram_budget_samples) + + g, *_ = make_dfgraph_costs(net=net, device=chipset) + total_runtime = sum(g.cost_cpu.values()) + total_ram = sum(g.cost_ram[i] for i in g.vfwd) + print(f"Total runtime of graph (forward + backward) = {total_runtime}") + print(f"Total RAM consumption of forward pass = {total_ram}") + print(f"### --- ### Total RAM consumption of forward pass = {total_ram}") + print(total_threads // solve_threads) + + with ProcessPoolExecutor(max_workers=total_threads // solve_threads) as executor: + for result in executor.map( + simple_solve, + [ + dict(model=model, + platform=platform, + ram_budget=ram_budget, + runtime_budget=runtime_budget, + mem_power_scale=mem_power_scale, + batch_size=batch_size, + use_actual_gurobi=use_actual_gurobi, + solver=solver, + time_limit_s=time_limit_s, + solve_threads=solve_threads) + for ram_budget in ram_budget_range + ], + ): + print_result(result) + print(result.total_power_cost_cpu, result.total_power_cost_page, result.ram_budget) + plt.plot(result.ram_budget, -1 if result.total_power_cost_cpu is None else result.total_power_cost_cpu + result.total_power_cost_page, "r.") + plt.draw() + plt.pause(0.1) + + print("Done!") + plt.show(block=True) + + +if __name__ == "__main__": + pareto(model="vgg16", platform="m4", runtime_budget=1.1, time_limit_s=120) diff --git a/poet/solve.py b/poet/solve.py index 98e77fc..f45b306 100644 --- a/poet/solve.py +++ b/poet/solve.py @@ -51,7 +51,7 @@ def solve( :param time_limit_s: The time limit for solving in seconds. :param solve_threads: The number of threads to use for solving. """ - chipset, net = get_chipset_and_net( + chipset, net, *_ = get_chipset_and_net( platform=platform, model=model, batch_size=batch_size, @@ -81,7 +81,7 @@ def solve( runtime_budget_ms = runtime_budget * total_runtime if use_actual_gurobi: - solver = POETSolverGurobi( + ilp_solver = POETSolverGurobi( g, cpu_power_cost_vec_joule, pagein_power_cost_vec_joule, @@ -94,7 +94,7 @@ def solve( solve_threads=solve_threads, ) else: - solver = POETSolver( + ilp_solver = POETSolver( g, cpu_power_cost_vec_joule=cpu_power_cost_vec_joule, pagein_power_cost_vec_joule=pagein_power_cost_vec_joule, @@ -108,7 +108,7 @@ def solve( solver=solver, ) - solution = solver.solve() + solution = ilp_solver.solve() if solution is not None and solution.feasible: cpu_cost_vec = np.asarray([g.cost_cpu[i] for i in range(g.size)])[np.newaxis, :].T diff --git a/poet/util.py b/poet/util.py index 0987b1f..019788c 100644 --- a/poet/util.py +++ b/poet/util.py @@ -91,36 +91,54 @@ def get_chipset_and_net(platform: str, model: str, batch_size: int, mem_power_sc elif platform == "jetsontx2": chipset = JetsonTX2 else: - raise NotImplementedError() + raise NotImplementedError(f"Platform {platform} not implemented.") chipset["MEMORY_POWER"] *= mem_power_scale if model == "linear": net = make_linear_network() + # TODO: these were randomly picked + ram_budget_start = 1.0e07 + ram_budget_end = 1.0e08 elif model == "vgg16": net = vgg16(batch_size) + ram_budget_start = 2.57e07 + ram_budget_end = 1.15e08 elif model == "vgg16_cifar": net = vgg16(batch_size, 10, (3, 32, 32)) + ram_budget_start = 2.57e07 / 49 + ram_budget_end = 1.15e08 / 49 elif model == "resnet18": net = resnet18(batch_size) + ram_budget_start = 6.42e06 + ram_budget_end = 2.85e07 elif model == "resnet50": net = resnet50(batch_size) + ram_budget_start = 6.97e06 + ram_budget_end = 1.27e08 elif model == "resnet18_cifar": net = resnet18_cifar(batch_size, 10, (3, 32, 32)) + ram_budget_start = 196608 + ram_budget_end = 2339408 elif model == "bert": net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=12) + # TODO: this is very broken + ram_budget_start = 1e6 + ram_budget_end = 1e9 elif model == "transformer": net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=1) + ram_budget_start = 3e7 # TODO: this was changed + ram_budget_end = 4e7 else: - raise NotImplementedError() + raise NotImplementedError(f"Model {model} not implemented.") - return chipset, net + return chipset, net, ram_budget_start, ram_budget_end def plot_network( platform: str, model: str, directory: str, batch_size: int = 1, mem_power_scale: float = 1.0, format="pdf", quiet=True, name="" ): - chipset, net = get_chipset_and_net(platform, model, batch_size, mem_power_scale) + chipset, net, *_ = get_chipset_and_net(platform, model, batch_size, mem_power_scale) g, *_ = make_dfgraph_costs(net, chipset) plot_dfgraph(g, directory, format, quiet, name) From ad349912ba2d62ef5150efcc521b9edc35b37685 Mon Sep 17 00:00:00 2001 From: Anish Shanbhag Date: Mon, 30 Jan 2023 22:22:15 -0800 Subject: [PATCH 18/18] Partially working Pareto plot --- poet/pareto.py | 41 ++++++++++++++++++++++++++--------------- poet/util.py | 4 ++-- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/poet/pareto.py b/poet/pareto.py index 22a09ac..45304e2 100644 --- a/poet/pareto.py +++ b/poet/pareto.py @@ -1,13 +1,15 @@ -from poet import solve -from typing import Literal, Optional -from poet.util import get_chipset_and_net, make_dfgraph_costs, get_net_costs, print_result -import numpy as np import os from concurrent.futures import ProcessPoolExecutor +from typing import Literal + import matplotlib.pyplot as plt +import numpy as np +from poet import solve +from poet.util import get_chipset_and_net, get_net_costs, make_dfgraph_costs -def simple_solve(params): + +def solve_wrapper(params): return solve(**params) @@ -27,12 +29,12 @@ def pareto( runtime_budget: float = 1.4, mem_power_scale=1.0, batch_size=1, - ram_budget_samples: int = 100, - use_actual_gurobi: Optional[bool] = True, - solver: Optional[Literal["gurobi", "cbc"]] = None, + ram_budget_samples: int = 20, + solver: Literal["gurobipy", "pulp-gurobi", "pulp-cbc"] = "gurobipy", time_limit_s: float = 1e100, solve_threads: int = 4, total_threads: int = os.cpu_count(), + filename: str = "pareto.png", ): plt.ion() @@ -44,9 +46,15 @@ def pareto( ) base_memory = max(get_net_costs(net=net, device=chipset)["memory_bytes"]) - print(base_memory, ram_budget_start, ram_budget_end, ram_budget_start / base_memory, ram_budget_end / base_memory) + print( + base_memory, + ram_budget_start, + ram_budget_end, + ram_budget_start / base_memory, + ram_budget_end / base_memory, + ) - ram_budget_range = np.linspace(0, ram_budget_end, ram_budget_samples) + ram_budget_range = np.linspace(ram_budget_start, ram_budget_end, ram_budget_samples) g, *_ = make_dfgraph_costs(net=net, device=chipset) total_runtime = sum(g.cost_cpu.values()) @@ -58,7 +66,7 @@ def pareto( with ProcessPoolExecutor(max_workers=total_threads // solve_threads) as executor: for result in executor.map( - simple_solve, + solve_wrapper, [ dict( model=model, @@ -67,7 +75,6 @@ def pareto( runtime_budget=runtime_budget, mem_power_scale=mem_power_scale, batch_size=batch_size, - use_actual_gurobi=use_actual_gurobi, solver=solver, time_limit_s=time_limit_s, solve_threads=solve_threads, @@ -75,8 +82,11 @@ def pareto( for ram_budget in ram_budget_range ], ): - print_result(result) - print(result.total_power_cost_cpu, result.total_power_cost_page, result.ram_budget) + print( + result.total_power_cost_cpu, + result.total_power_cost_page, + result.ram_budget, + ) plt.plot( result.ram_budget, -1 if result.total_power_cost_cpu is None else result.total_power_cost_cpu + result.total_power_cost_page, @@ -86,8 +96,9 @@ def pareto( plt.pause(0.1) print("Done!") + plt.savefig(filename) plt.show(block=True) if __name__ == "__main__": - pareto(model="vgg16", platform="m4", runtime_budget=1.1, time_limit_s=120) + pareto(model="resnet18_cifar", platform="m4", runtime_budget=1.2, time_limit_s=600) diff --git a/poet/util.py b/poet/util.py index 7e181eb..a83d6e4 100644 --- a/poet/util.py +++ b/poet/util.py @@ -132,8 +132,8 @@ def get_chipset_and_net(platform: str, model: str, batch_size: int, mem_power_sc ram_budget_end = 1e9 elif model == "transformer": net = BERTBase(SEQ_LEN=512, HIDDEN_DIM=768, I=64, HEADS=12, NUM_TRANSFORMER_BLOCKS=1) - ram_budget_start = 3e7 # TODO: this was changed - ram_budget_end = 4e7 + ram_budget_start = 1e5 + ram_budget_end = 7e7 else: raise NotImplementedError(f"Model {model} not implemented.")