From 15b717402351db4f3b9a02922d0a2cf071d8d8a3 Mon Sep 17 00:00:00 2001 From: yolo2themoon Date: Tue, 26 Oct 2021 10:04:43 +0800 Subject: [PATCH 1/2] [refactor] [misc] Refactoring benchmark code for performance monitoring --- benchmarks/misc/membound.py | 119 +++++++++++++----------------- benchmarks/misc/membound_cases.py | 8 +- benchmarks/misc/run.py | 86 +++++++++++++-------- benchmarks/misc/utils.py | 31 +++++--- 4 files changed, 133 insertions(+), 111 deletions(-) diff --git a/benchmarks/misc/membound.py b/benchmarks/misc/membound.py index c2aedab9d1432..d408e69f6aca9 100644 --- a/benchmarks/misc/membound.py +++ b/benchmarks/misc/membound.py @@ -1,107 +1,94 @@ import time from membound_cases import fill, reduction, saxpy -from utils import * +from utils import (arch_name, dtype2str, geometric_mean, kibibyte, + md_table_header, size2str) import taichi as ti -test_cases = [fill, saxpy, reduction] -test_archs = [ti.cuda] -test_dtype = [ti.i32, ti.i64, ti.f32, ti.f64] -test_dsize = [(4**i) * kibibyte for i in range(1, 10)] #[4KB,16KB...256MB] -test_repeat = 10 -results_evaluation = [geometric_mean] +class SuiteInfo: + cases = [fill, saxpy, reduction] + supported_archs = [ti.cpu, ti.cuda] + dtype = [ti.i32, ti.i64, ti.f32, ti.f64] + dsize = [(4**i) * kibibyte for i in range(1, 10)] #[4KB,16KB...256MB] + repeat = 10 + evaluator = [geometric_mean] -class BenchmarkResult: - def __init__(self, name, arch, dtype, dsize, results_evaluation): + +class CaseResult: + def __init__(self, name, arch, dtype, dsize, evaluator): self.test_name = name self.test_arch = arch self.data_type = dtype - self.data_size = dsize - self.min_time_in_us = [] - self.results_evaluation = results_evaluation + self.data_size = dsize #list + self.min_time_in_us = [] #list + self.evaluator = evaluator - def time2mdtableline(self): + def result_to_markdown(self): string = '|' + self.test_name + '.' + dtype2str[self.data_type] + '|' string += ''.join( str(round(time, 4)) + '|' for time in self.min_time_in_us) string += ''.join( str(round(item(self.min_time_in_us), 4)) + '|' - for item in self.results_evaluation) + for item in self.evaluator) return string -class BenchmarkImpl: - def __init__(self, func, archs, data_type, data_size): +class CaseImpl: + def __init__(self, func, arch, data_type, data_size): self.func = func self.name = func.__name__ self.env = None self.device = None - self.archs = archs + self.arch = arch self.data_type = data_type self.data_size = data_size - self.benchmark_results = [] + self.case_results = [] def run(self): - for arch in self.archs: - for dtype in self.data_type: - ti.init(kernel_profiler=True, arch=arch) - print("TestCase[%s.%s.%s]" % - (self.func.__name__, ti.core.arch_name(arch), - dtype2str[dtype])) - result = BenchmarkResult(self.name, arch, dtype, - self.data_size, results_evaluation) - for size in self.data_size: - print("data_size = %s" % (size2str(size))) - result.min_time_in_us.append( - self.func(arch, dtype, size, test_repeat)) - time.sleep(0.2) - self.benchmark_results.append(result) - - def print(self): - i = 0 - for arch in self.archs: - for dtype in self.data_type: - for idx in range(len(self.data_size)): - print( - " test_case:[%s] arch:[%s] dtype:[%s] dsize:[%7s] >>> time:[%4.4f]" - % - (self.name, ti.core.arch_name(arch), dtype2str[dtype], - size2str(self.benchmark_results[i].data_size[idx]), - self.benchmark_results[i].min_time_in_us[idx])) - i = i + 1 - - def save2markdown(self, arch): + for dtype in self.data_type: + ti.init(kernel_profiler=True, arch=self.arch) + print("TestCase[%s.%s.%s]" % + (self.func.__name__, arch_name(self.arch), dtype2str[dtype])) + result = CaseResult(self.name, self.arch, dtype, self.data_size, + SuiteInfo.evaluator) + for size in self.data_size: + print("data_size = %s" % (size2str(size))) + result.min_time_in_us.append( + self.func(self.arch, dtype, size, SuiteInfo.repeat)) + time.sleep(0.2) + self.case_results.append(result) + + def to_markdown(self): header = '|kernel elapsed time(ms)' + ''.join( - '|' for i in range(len(self.data_size) + len(results_evaluation))) + '|' for i in range(len(self.data_size) + len(SuiteInfo.evaluator))) lines = [header] - for result in self.benchmark_results: - if (result.test_arch == arch): - lines.append(result.time2mdtableline()) + for result in self.case_results: + lines.append(result.result_to_markdown()) return lines -class Membound: - benchmark_imps = [] +class MemoryBound: + suite_name = 'memorybound' + supported_archs = SuiteInfo.supported_archs - def __init__(self): - for case in test_cases: - self.benchmark_imps.append( - BenchmarkImpl(case, test_archs, test_dtype, test_dsize)) + def __init__(self, arch): + self.arch = arch + self.cases_impl = [] + for case in SuiteInfo.cases: + self.cases_impl.append( + CaseImpl(case, arch, SuiteInfo.dtype, SuiteInfo.dsize)) def run(self): - for case in self.benchmark_imps: + for case in self.cases_impl: case.run() - def mdlines(self, arch): + def get_markdown_str(self): lines = [] - lines += md_table_header(self.__class__.__name__, arch, test_dsize, - test_repeat, results_evaluation) - for case in self.benchmark_imps: - if arch in case.archs: - lines += case.save2markdown(arch) - else: - continue + lines += md_table_header(self.suite_name, self.arch, SuiteInfo.dsize, + SuiteInfo.repeat, SuiteInfo.evaluator) + for case in self.cases_impl: + lines += case.to_markdown() lines.append('') return lines diff --git a/benchmarks/misc/membound_cases.py b/benchmarks/misc/membound_cases.py index b02b76382f008..1a44a671d97cf 100644 --- a/benchmarks/misc/membound_cases.py +++ b/benchmarks/misc/membound_cases.py @@ -1,4 +1,4 @@ -from utils import dtype_size, scale_repeat, size2str +from utils import dtype_size, repeat_times import taichi as ti @@ -25,7 +25,7 @@ def membound_benchmark(func, num_elements, repeat): def fill(arch, dtype, dsize, repeat=10): - repeat = scale_repeat(arch, dsize, repeat) + repeat = repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] x = ti.field(dtype, shape=num_elements) @@ -40,7 +40,7 @@ def fill_const(n: ti.i32): def saxpy(arch, dtype, dsize, repeat=10): - repeat = scale_repeat(arch, dsize, repeat) + repeat = repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] // 3 #z=x+y x = ti.field(dtype, shape=num_elements) @@ -60,7 +60,7 @@ def saxpy(n: ti.i32): def reduction(arch, dtype, dsize, repeat=10): - repeat = scale_repeat(arch, dsize, repeat) + repeat = repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] x = ti.field(dtype, shape=num_elements) diff --git a/benchmarks/misc/run.py b/benchmarks/misc/run.py index 06406c9fd41fa..f7eaba5339b9e 100644 --- a/benchmarks/misc/run.py +++ b/benchmarks/misc/run.py @@ -1,51 +1,75 @@ -import datetime import os -import sys -from membound import Membound +from membound import MemoryBound from taichi.core import ti_core as _ti_core +from utils import arch_name, datatime_with_format import taichi as ti -test_suites = [Membound] -test_archs = [ti.cuda] +benchmark_suites = [MemoryBound] +benchmark_archs = [ti.cpu, ti.cuda] + + +class BenchmarkInfo: + def __init__(self, pull_request_id, commit_hash): + self.pull_request_id = pull_request_id #int + self.commit_hash = commit_hash #str + self.archs = [] #list ['x64','CUDA','Vulkan', ...] + self.datetime = [] #list [begin, end] class PerformanceMonitoring: - suites = [] + def __init__(self, arch): + self.suites = [] + self.arch = arch + for suite in benchmark_suites: + if self.check_supported(arch, suite): + self.suites.append(suite(arch)) - def __init__(self): - for s in test_suites: - self.suites.append(s()) + def check_supported(self, arch, suite): + if arch in suite.supported_archs: + return True + else: + RuntimeWarning( + 'arch[' + arch_name(arch) + + '] does not exist in SuiteInfo.supported_archs of class ' + + suite.__name__) + return False def run(self): - print("Running...") + print(f'Arch : {arch_name(self.arch)} Running...') + for suite in self.suites: + suite.run() + + def save_to_markdown(self, arch_dir='./'): + current_time = datatime_with_format() + commit_hash = _ti_core.get_commit_hash() #[:8] for s in self.suites: - s.run() + file_name = f'{s.suite_name}.md' + path = os.path.join(arch_dir, file_name) + with open(path, 'w') as f: + lines = [ + f'commit_hash: {commit_hash}\n', + f'datatime: {current_time}\n' + ] + s.get_markdown_str() + for line in lines: + print(line, file=f) - def store_to_path(self, path_with_file_name='./performance_result.md'): - with open(path_with_file_name, 'w') as f: - for arch in test_archs: - for s in self.suites: - lines = s.mdlines(arch) - for line in lines: - print(line, file=f) - def store_with_date_and_commit_id(self, file_dir='./'): - current_time = datetime.datetime.now().strftime("%Y%m%dd%Hh%Mm%Ss") - commit_hash = _ti_core.get_commit_hash()[:8] - file_name = f'perfresult_{current_time}_{commit_hash}.md' - path = os.path.join(file_dir, file_name) - print('Storing benchmark result to: ' + path) - self.store_to_path(path) +def main(): + benchmark_dir = os.path.join(os.getcwd(), 'results') + os.makedirs(benchmark_dir) -def main(): - file_dir = sys.argv[1] if len(sys.argv) > 1 else './' - p = PerformanceMonitoring() - p.run() - p.store_to_path() # for /benchmark - p.store_with_date_and_commit_id(file_dir) #for postsubmit + for arch in benchmark_archs: + #make dir + arch_dir = os.path.join(benchmark_dir, arch_name(arch)) + os.makedirs(arch_dir) + #init & run + impl = PerformanceMonitoring(arch) + impl.run() + #save result + impl.save_to_markdown(arch_dir) if __name__ == '__main__': diff --git a/benchmarks/misc/utils.py b/benchmarks/misc/utils.py index 5e6206116b34d..c3ff778e27890 100644 --- a/benchmarks/misc/utils.py +++ b/benchmarks/misc/utils.py @@ -1,3 +1,7 @@ +import datetime + +from taichi.core import ti_core as _ti_core + import taichi as ti kibibyte = 1024 @@ -13,21 +17,20 @@ (1073741824.0, 'GB'), (float('inf'), 'INF')] #B KB MB GB +def arch_name(arch): + return _ti_core.arch_name(arch) + + +def datatime_with_format(): + return datetime.datetime.now().isoformat() + + def size2str(size_in_byte): for dsize, units in reversed(size_subsection): if size_in_byte >= dsize: return str(round(size_in_byte / dsize, 4)) + units -def scale_repeat(arch, datasize, repeat=10): - scaled = repeat - if (arch == ti.gpu) | (arch == ti.opengl) | (arch == ti.cuda): - scaled *= 10 - if datasize <= 4 * 1024 * 1024: - scaled *= 10 - return scaled - - def geometric_mean(data_array): product = 1 for data in data_array: @@ -35,6 +38,14 @@ def geometric_mean(data_array): return pow(product, 1.0 / len(data_array)) +def repeat_times(arch, datasize, repeat=1): + if (arch == ti.gpu) | (arch == ti.opengl) | (arch == ti.cuda): + repeat *= 10 + if datasize <= 4 * 1024 * 1024: + repeat *= 10 + return repeat + + def md_table_header(suite_name, arch, test_dsize, test_repeat, results_evaluation): header = '|' + suite_name + '.' + ti.core.arch_name(arch) + '|' @@ -51,7 +62,7 @@ def md_table_header(suite_name, arch, test_dsize, test_repeat, repeat = '|**repeat**|' repeat += ''.join( - str(scale_repeat(arch, size, test_repeat)) + '|' + str(repeat_times(arch, size, test_repeat)) + '|' for size in test_dsize) repeat += ''.join('|' for i in range(len(results_evaluation))) From be2d045050e5db376897b4ab82720dc05027ce66 Mon Sep 17 00:00:00 2001 From: yolo2themoon Date: Tue, 26 Oct 2021 18:34:11 +0800 Subject: [PATCH 2/2] apply suggestions from code review --- benchmarks/misc/membound.py | 126 +++++++++++++----------------- benchmarks/misc/membound_cases.py | 11 ++- benchmarks/misc/run.py | 34 ++++---- benchmarks/misc/utils.py | 8 +- 4 files changed, 83 insertions(+), 96 deletions(-) diff --git a/benchmarks/misc/membound.py b/benchmarks/misc/membound.py index d408e69f6aca9..6f1952857c4a5 100644 --- a/benchmarks/misc/membound.py +++ b/benchmarks/misc/membound.py @@ -1,94 +1,78 @@ import time -from membound_cases import fill, reduction, saxpy +from membound_cases import memory_bound_cases_list from utils import (arch_name, dtype2str, geometric_mean, kibibyte, md_table_header, size2str) import taichi as ti -class SuiteInfo: - cases = [fill, saxpy, reduction] - supported_archs = [ti.cpu, ti.cuda] - dtype = [ti.i32, ti.i64, ti.f32, ti.f64] - dsize = [(4**i) * kibibyte for i in range(1, 10)] #[4KB,16KB...256MB] - repeat = 10 - evaluator = [geometric_mean] - - -class CaseResult: - def __init__(self, name, arch, dtype, dsize, evaluator): - self.test_name = name - self.test_arch = arch - self.data_type = dtype - self.data_size = dsize #list - self.min_time_in_us = [] #list - self.evaluator = evaluator - - def result_to_markdown(self): - string = '|' + self.test_name + '.' + dtype2str[self.data_type] + '|' - string += ''.join( - str(round(time, 4)) + '|' for time in self.min_time_in_us) - string += ''.join( - str(round(item(self.min_time_in_us), 4)) + '|' - for item in self.evaluator) - return string - - -class CaseImpl: - def __init__(self, func, arch, data_type, data_size): - self.func = func - self.name = func.__name__ - self.env = None - self.device = None - self.arch = arch - self.data_type = data_type - self.data_size = data_size - self.case_results = [] - - def run(self): - for dtype in self.data_type: - ti.init(kernel_profiler=True, arch=self.arch) - print("TestCase[%s.%s.%s]" % - (self.func.__name__, arch_name(self.arch), dtype2str[dtype])) - result = CaseResult(self.name, self.arch, dtype, self.data_size, - SuiteInfo.evaluator) - for size in self.data_size: - print("data_size = %s" % (size2str(size))) - result.min_time_in_us.append( - self.func(self.arch, dtype, size, SuiteInfo.repeat)) - time.sleep(0.2) - self.case_results.append(result) - - def to_markdown(self): - header = '|kernel elapsed time(ms)' + ''.join( - '|' for i in range(len(self.data_size) + len(SuiteInfo.evaluator))) - lines = [header] - for result in self.case_results: - lines.append(result.result_to_markdown()) - return lines - - class MemoryBound: suite_name = 'memorybound' - supported_archs = SuiteInfo.supported_archs + supported_archs = [ti.cpu, ti.cuda] + test_cases = memory_bound_cases_list + test_dtype_list = [ti.i32, ti.i64, ti.f32, ti.f64] + test_dsize_list = [(4**i) * kibibyte + for i in range(1, 10)] #[4KB,16KB...256MB] + basic_repeat_times = 10 + evaluator = [geometric_mean] def __init__(self, arch): self.arch = arch self.cases_impl = [] - for case in SuiteInfo.cases: - self.cases_impl.append( - CaseImpl(case, arch, SuiteInfo.dtype, SuiteInfo.dsize)) + for case in self.test_cases: + for dtype in self.test_dtype_list: + impl = CaseImpl(case, arch, dtype, self.test_dsize_list, + self.evaluator) + self.cases_impl.append(impl) def run(self): for case in self.cases_impl: case.run() - def get_markdown_str(self): + def get_markdown_lines(self): lines = [] - lines += md_table_header(self.suite_name, self.arch, SuiteInfo.dsize, - SuiteInfo.repeat, SuiteInfo.evaluator) + lines += md_table_header(self.suite_name, self.arch, + self.test_dsize_list, self.basic_repeat_times, + self.evaluator) + + result_header = '|kernel elapsed time(ms)' + ''.join( + '|' for i in range( + len(self.test_dsize_list) + len(MemoryBound.evaluator))) + lines += [result_header] for case in self.cases_impl: - lines += case.to_markdown() + lines += case.get_markdown_lines() lines.append('') return lines + + +class CaseImpl: + def __init__(self, func, arch, test_dtype, test_dsize_list, evaluator): + self.func = func + self.name = func.__name__ + self.arch = arch + self.test_dtype = test_dtype + self.test_dsize_list = test_dsize_list + self.min_time_in_us = [] #test results + self.evaluator = evaluator + + def run(self): + ti.init(kernel_profiler=True, arch=self.arch) + print("TestCase[%s.%s.%s]" % (self.func.__name__, arch_name( + self.arch), dtype2str[self.test_dtype])) + for test_dsize in self.test_dsize_list: + print("test_dsize = %s" % (size2str(test_dsize))) + self.min_time_in_us.append( + self.func(self.arch, self.test_dtype, test_dsize, + MemoryBound.basic_repeat_times)) + time.sleep(0.2) + ti.reset() + + def get_markdown_lines(self): + string = '|' + self.name + '.' + dtype2str[self.test_dtype] + '|' + string += ''.join( + str(round(time, 4)) + '|' for time in self.min_time_in_us) + string += ''.join( + str(round(item(self.min_time_in_us), 4)) + '|' + for item in self.evaluator) + return [string] diff --git a/benchmarks/misc/membound_cases.py b/benchmarks/misc/membound_cases.py index 1a44a671d97cf..5792a85a2ce63 100644 --- a/benchmarks/misc/membound_cases.py +++ b/benchmarks/misc/membound_cases.py @@ -1,4 +1,4 @@ -from utils import dtype_size, repeat_times +from utils import dtype_size, scaled_repeat_times import taichi as ti @@ -25,7 +25,7 @@ def membound_benchmark(func, num_elements, repeat): def fill(arch, dtype, dsize, repeat=10): - repeat = repeat_times(arch, dsize, repeat) + repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] x = ti.field(dtype, shape=num_elements) @@ -40,7 +40,7 @@ def fill_const(n: ti.i32): def saxpy(arch, dtype, dsize, repeat=10): - repeat = repeat_times(arch, dsize, repeat) + repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] // 3 #z=x+y x = ti.field(dtype, shape=num_elements) @@ -60,7 +60,7 @@ def saxpy(n: ti.i32): def reduction(arch, dtype, dsize, repeat=10): - repeat = repeat_times(arch, dsize, repeat) + repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] x = ti.field(dtype, shape=num_elements) @@ -74,3 +74,6 @@ def reduction(n: ti.i32): init_const(x, dtype, num_elements) return membound_benchmark(reduction, num_elements, repeat) + + +memory_bound_cases_list = [fill, saxpy, reduction] diff --git a/benchmarks/misc/run.py b/benchmarks/misc/run.py index f7eaba5339b9e..4922e043d14bd 100644 --- a/benchmarks/misc/run.py +++ b/benchmarks/misc/run.py @@ -1,4 +1,5 @@ import os +import warnings from membound import MemoryBound from taichi.core import ti_core as _ti_core @@ -10,15 +11,15 @@ benchmark_archs = [ti.cpu, ti.cuda] -class BenchmarkInfo: +class CommitInfo: def __init__(self, pull_request_id, commit_hash): - self.pull_request_id = pull_request_id #int + self.pull_request_id = pull_request_id self.commit_hash = commit_hash #str - self.archs = [] #list ['x64','CUDA','Vulkan', ...] - self.datetime = [] #list [begin, end] + self.archs = [] #['x64','cuda','vulkan', ...] + self.datetime = [] #[start, end] -class PerformanceMonitoring: +class BenchmarkSuites: def __init__(self, arch): self.suites = [] self.arch = arch @@ -30,28 +31,29 @@ def check_supported(self, arch, suite): if arch in suite.supported_archs: return True else: - RuntimeWarning( - 'arch[' + arch_name(arch) + - '] does not exist in SuiteInfo.supported_archs of class ' + - suite.__name__) + warnings.warn( + f'Arch [{arch_name(arch)}] does not exist in {suite.__name__}.supported_archs.', + UserWarning, + stacklevel=2) return False def run(self): - print(f'Arch : {arch_name(self.arch)} Running...') + print(f'Arch [{arch_name(self.arch)}] Running...') for suite in self.suites: suite.run() def save_to_markdown(self, arch_dir='./'): current_time = datatime_with_format() commit_hash = _ti_core.get_commit_hash() #[:8] - for s in self.suites: - file_name = f'{s.suite_name}.md' + for suite in self.suites: + file_name = f'{suite.suite_name}.md' path = os.path.join(arch_dir, file_name) with open(path, 'w') as f: lines = [ f'commit_hash: {commit_hash}\n', f'datatime: {current_time}\n' - ] + s.get_markdown_str() + ] + lines += suite.get_markdown_lines() for line in lines: print(line, file=f) @@ -66,10 +68,10 @@ def main(): arch_dir = os.path.join(benchmark_dir, arch_name(arch)) os.makedirs(arch_dir) #init & run - impl = PerformanceMonitoring(arch) - impl.run() + suites = BenchmarkSuites(arch) + suites.run() #save result - impl.save_to_markdown(arch_dir) + suites.save_to_markdown(arch_dir) if __name__ == '__main__': diff --git a/benchmarks/misc/utils.py b/benchmarks/misc/utils.py index c3ff778e27890..3548151ec6fce 100644 --- a/benchmarks/misc/utils.py +++ b/benchmarks/misc/utils.py @@ -1,7 +1,5 @@ import datetime -from taichi.core import ti_core as _ti_core - import taichi as ti kibibyte = 1024 @@ -18,7 +16,7 @@ def arch_name(arch): - return _ti_core.arch_name(arch) + return str(arch).replace('Arch.', '') def datatime_with_format(): @@ -38,7 +36,7 @@ def geometric_mean(data_array): return pow(product, 1.0 / len(data_array)) -def repeat_times(arch, datasize, repeat=1): +def scaled_repeat_times(arch, datasize, repeat=1): if (arch == ti.gpu) | (arch == ti.opengl) | (arch == ti.cuda): repeat *= 10 if datasize <= 4 * 1024 * 1024: @@ -62,7 +60,7 @@ def md_table_header(suite_name, arch, test_dsize, test_repeat, repeat = '|**repeat**|' repeat += ''.join( - str(repeat_times(arch, size, test_repeat)) + '|' + str(scaled_repeat_times(arch, size, test_repeat)) + '|' for size in test_dsize) repeat += ''.join('|' for i in range(len(results_evaluation)))