diff --git a/spar/data/distributions/convert.py b/spar/data/distributions/convert.py new file mode 100755 index 0000000..61ce8d8 --- /dev/null +++ b/spar/data/distributions/convert.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3.7 +import pathlib +import pickle + +for pkl_file in sorted(pathlib.Path(".").glob("*_dist.pkl")): + print(f"Read data from {pkl_file}") + with open(pkl_file, 'rb') as f: + dist = pickle.load(f) + hist = {k: v._histogram for k, v in dist.items()} if isinstance(dist, dict) else dist._histogram + print(f"Extracted histograms: {len(hist)}") + hist_file = str(pkl_file).replace('dist', 'hist') + print(f"dump histograms to {hist_file}") + with open(hist_file, 'wb') as f: + pickle.dump(hist, f, protocol=4, fix_imports=True) + print("Finished") diff --git a/spar/data/distributions/cpl_hist.pkl b/spar/data/distributions/cpl_hist.pkl new file mode 100644 index 0000000..cf9e4a6 Binary files /dev/null and b/spar/data/distributions/cpl_hist.pkl differ diff --git a/spar/data/distributions/instance_cpu_hist.pkl b/spar/data/distributions/instance_cpu_hist.pkl new file mode 100644 index 0000000..a48e2b4 Binary files /dev/null and b/spar/data/distributions/instance_cpu_hist.pkl differ diff --git a/spar/data/distributions/instance_duration_hist.pkl b/spar/data/distributions/instance_duration_hist.pkl new file mode 100644 index 0000000..410bef8 Binary files /dev/null and b/spar/data/distributions/instance_duration_hist.pkl differ diff --git a/spar/data/distributions/instance_mem_hist.pkl b/spar/data/distributions/instance_mem_hist.pkl new file mode 100644 index 0000000..3cd5030 Binary files /dev/null and b/spar/data/distributions/instance_mem_hist.pkl differ diff --git a/spar/data/distributions/instance_num_hist.pkl b/spar/data/distributions/instance_num_hist.pkl new file mode 100644 index 0000000..9d25ef2 Binary files /dev/null and b/spar/data/distributions/instance_num_hist.pkl differ diff --git a/spar/data/distributions/job_interval_hist.pkl b/spar/data/distributions/job_interval_hist.pkl new file mode 100644 index 0000000..df292b4 Binary files /dev/null and b/spar/data/distributions/job_interval_hist.pkl differ diff --git a/spar/data/distributions/level_hist.pkl b/spar/data/distributions/level_hist.pkl new file mode 100644 index 0000000..529dda8 Binary files /dev/null and b/spar/data/distributions/level_hist.pkl differ diff --git a/spar/data/distributions/task_cpu_hist.pkl b/spar/data/distributions/task_cpu_hist.pkl new file mode 100644 index 0000000..382cc50 Binary files /dev/null and b/spar/data/distributions/task_cpu_hist.pkl differ diff --git a/spar/data/distributions/task_duration_hist.pkl b/spar/data/distributions/task_duration_hist.pkl new file mode 100644 index 0000000..181c107 Binary files /dev/null and b/spar/data/distributions/task_duration_hist.pkl differ diff --git a/spar/data/distributions/task_mem_hist.pkl b/spar/data/distributions/task_mem_hist.pkl new file mode 100644 index 0000000..ceb1cf6 Binary files /dev/null and b/spar/data/distributions/task_mem_hist.pkl differ diff --git a/spar/data/distributions/task_num_hist.pkl b/spar/data/distributions/task_num_hist.pkl new file mode 100644 index 0000000..afd1909 Binary files /dev/null and b/spar/data/distributions/task_num_hist.pkl differ diff --git a/spar/utils.py b/spar/utils.py index 163e64f..5638268 100644 --- a/spar/utils.py +++ b/spar/utils.py @@ -6,18 +6,26 @@ from collections import defaultdict import numpy as np - +from scipy.stats import rv_histogram DATA_DIR = Path(__file__).resolve().parents[0] / 'data' / 'distributions' DIST_CACHE = {} SAMPLE_CACHE = defaultdict(list) -def draw(dist_name, num=1, path=[], output_integer=True): +def draw(dist_name, num=1, path=tuple(), output_integer=True, from_hist=True): """Draw random samples from a given distribution.""" if dist_name not in DIST_CACHE: - with (DATA_DIR / dist_name).open('rb') as f: - DIST_CACHE[dist_name] = pickle.load(f) + if from_hist: + with (DATA_DIR / dist_name.replace('dist', 'hist')).open('rb') as f: + hist = pickle.load(f) + if isinstance(hist, dict): + DIST_CACHE[dist_name] = {k: rv_histogram(histogram=v, density=False) for k, v in hist.items()} + else: + DIST_CACHE[dist_name] = rv_histogram(histogram=hist, density=False) + else: + with (DATA_DIR / dist_name).open('rb') as f: + DIST_CACHE[dist_name] = pickle.load(f) dist = DIST_CACHE[dist_name] for p in path: