Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 64 additions & 93 deletions benchmarks/misc/membound.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,78 @@
import time

from membound_cases import fill, reduction, saxpy
from utils import *
from membound_cases import memory_bound_cases_list
from utils import (arch_name, dtype2str, geometric_mean, kibibyte,
md_table_header, size2str)

import taichi as ti

test_cases = [fill, saxpy, reduction]
test_archs = [ti.cuda]
test_dtype = [ti.i32, ti.i64, ti.f32, ti.f64]
test_dsize = [(4**i) * kibibyte for i in range(1, 10)] #[4KB,16KB...256MB]
test_repeat = 10
results_evaluation = [geometric_mean]


class BenchmarkResult:
def __init__(self, name, arch, dtype, dsize, results_evaluation):
self.test_name = name
self.test_arch = arch
self.data_type = dtype
self.data_size = dsize
self.min_time_in_us = []
self.results_evaluation = results_evaluation

def time2mdtableline(self):
string = '|' + self.test_name + '.' + dtype2str[self.data_type] + '|'
string += ''.join(
str(round(time, 4)) + '|' for time in self.min_time_in_us)
string += ''.join(
str(round(item(self.min_time_in_us), 4)) + '|'
for item in self.results_evaluation)
return string


class BenchmarkImpl:
def __init__(self, func, archs, data_type, data_size):
self.func = func
self.name = func.__name__
self.env = None
self.device = None
self.archs = archs
self.data_type = data_type
self.data_size = data_size
self.benchmark_results = []
class MemoryBound:
suite_name = 'memorybound'
supported_archs = [ti.cpu, ti.cuda]
test_cases = memory_bound_cases_list
test_dtype_list = [ti.i32, ti.i64, ti.f32, ti.f64]
test_dsize_list = [(4**i) * kibibyte
for i in range(1, 10)] #[4KB,16KB...256MB]
basic_repeat_times = 10
evaluator = [geometric_mean]

def __init__(self, arch):
self.arch = arch
self.cases_impl = []
for case in self.test_cases:
for dtype in self.test_dtype_list:
impl = CaseImpl(case, arch, dtype, self.test_dsize_list,
self.evaluator)
self.cases_impl.append(impl)

def run(self):
for arch in self.archs:
for dtype in self.data_type:
ti.init(kernel_profiler=True, arch=arch)
print("TestCase[%s.%s.%s]" %
(self.func.__name__, ti.core.arch_name(arch),
dtype2str[dtype]))
result = BenchmarkResult(self.name, arch, dtype,
self.data_size, results_evaluation)
for size in self.data_size:
print("data_size = %s" % (size2str(size)))
result.min_time_in_us.append(
self.func(arch, dtype, size, test_repeat))
time.sleep(0.2)
self.benchmark_results.append(result)

def print(self):
i = 0
for arch in self.archs:
for dtype in self.data_type:
for idx in range(len(self.data_size)):
print(
" test_case:[%s] arch:[%s] dtype:[%s] dsize:[%7s] >>> time:[%4.4f]"
%
(self.name, ti.core.arch_name(arch), dtype2str[dtype],
size2str(self.benchmark_results[i].data_size[idx]),
self.benchmark_results[i].min_time_in_us[idx]))
i = i + 1
for case in self.cases_impl:
case.run()

def save2markdown(self, arch):
header = '|kernel elapsed time(ms)' + ''.join(
'|' for i in range(len(self.data_size) + len(results_evaluation)))
lines = [header]
for result in self.benchmark_results:
if (result.test_arch == arch):
lines.append(result.time2mdtableline())
def get_markdown_lines(self):
lines = []
lines += md_table_header(self.suite_name, self.arch,
self.test_dsize_list, self.basic_repeat_times,
self.evaluator)

result_header = '|kernel elapsed time(ms)' + ''.join(
'|' for i in range(
len(self.test_dsize_list) + len(MemoryBound.evaluator)))
lines += [result_header]
for case in self.cases_impl:
lines += case.get_markdown_lines()
lines.append('')
return lines


class Membound:
benchmark_imps = []

def __init__(self):
for case in test_cases:
self.benchmark_imps.append(
BenchmarkImpl(case, test_archs, test_dtype, test_dsize))
class CaseImpl:
def __init__(self, func, arch, test_dtype, test_dsize_list, evaluator):
self.func = func
self.name = func.__name__
self.arch = arch
self.test_dtype = test_dtype
self.test_dsize_list = test_dsize_list
self.min_time_in_us = [] #test results
self.evaluator = evaluator

def run(self):
for case in self.benchmark_imps:
case.run()

def mdlines(self, arch):
lines = []
lines += md_table_header(self.__class__.__name__, arch, test_dsize,
test_repeat, results_evaluation)
for case in self.benchmark_imps:
if arch in case.archs:
lines += case.save2markdown(arch)
else:
continue
lines.append('')
return lines
ti.init(kernel_profiler=True, arch=self.arch)
print("TestCase[%s.%s.%s]" % (self.func.__name__, arch_name(
self.arch), dtype2str[self.test_dtype]))
for test_dsize in self.test_dsize_list:
print("test_dsize = %s" % (size2str(test_dsize)))
self.min_time_in_us.append(
self.func(self.arch, self.test_dtype, test_dsize,
MemoryBound.basic_repeat_times))
time.sleep(0.2)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for my own understanding ;/, why do we need sleep 0.2 here?

Copy link
Copy Markdown
Contributor Author

@yolo2themoon yolo2themoon Oct 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was to give the device a cooling time, but here sleep(0.2s) is quite arbitrary.
Theoretically, we just need to make sure that this condition is consistent for each benchmark test.
Perhaps we can remove sleep(0.2s) to avoid performance fluctuations caused by subsequent changes.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, ok, good to know. Thanks. The time is probably HW and workload dependent, hard to quantify.

ti.reset()

def get_markdown_lines(self):
string = '|' + self.name + '.' + dtype2str[self.test_dtype] + '|'
string += ''.join(
str(round(time, 4)) + '|' for time in self.min_time_in_us)
string += ''.join(
str(round(item(self.min_time_in_us), 4)) + '|'
for item in self.evaluator)
return [string]
11 changes: 7 additions & 4 deletions benchmarks/misc/membound_cases.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from utils import dtype_size, scale_repeat, size2str
from utils import dtype_size, scaled_repeat_times

import taichi as ti

Expand All @@ -25,7 +25,7 @@ def membound_benchmark(func, num_elements, repeat):

def fill(arch, dtype, dsize, repeat=10):

repeat = scale_repeat(arch, dsize, repeat)
repeat = scaled_repeat_times(arch, dsize, repeat)
num_elements = dsize // dtype_size[dtype]

x = ti.field(dtype, shape=num_elements)
Expand All @@ -40,7 +40,7 @@ def fill_const(n: ti.i32):

def saxpy(arch, dtype, dsize, repeat=10):

repeat = scale_repeat(arch, dsize, repeat)
repeat = scaled_repeat_times(arch, dsize, repeat)
num_elements = dsize // dtype_size[dtype] // 3 #z=x+y

x = ti.field(dtype, shape=num_elements)
Expand All @@ -60,7 +60,7 @@ def saxpy(n: ti.i32):

def reduction(arch, dtype, dsize, repeat=10):

repeat = scale_repeat(arch, dsize, repeat)
repeat = scaled_repeat_times(arch, dsize, repeat)
num_elements = dsize // dtype_size[dtype]

x = ti.field(dtype, shape=num_elements)
Expand All @@ -74,3 +74,6 @@ def reduction(n: ti.i32):

init_const(x, dtype, num_elements)
return membound_benchmark(reduction, num_elements, repeat)


memory_bound_cases_list = [fill, saxpy, reduction]
94 changes: 60 additions & 34 deletions benchmarks/misc/run.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,77 @@
import datetime
import os
import sys
import warnings

from membound import Membound
from membound import MemoryBound
from taichi.core import ti_core as _ti_core
from utils import arch_name, datatime_with_format

import taichi as ti

test_suites = [Membound]
test_archs = [ti.cuda]
benchmark_suites = [MemoryBound]
benchmark_archs = [ti.cpu, ti.cuda]


class PerformanceMonitoring:
suites = []
class CommitInfo:
def __init__(self, pull_request_id, commit_hash):
self.pull_request_id = pull_request_id
self.commit_hash = commit_hash #str
self.archs = [] #['x64','cuda','vulkan', ...]
self.datetime = [] #[start, end]

def __init__(self):
for s in test_suites:
self.suites.append(s())

class BenchmarkSuites:
def __init__(self, arch):
self.suites = []
self.arch = arch
for suite in benchmark_suites:
if self.check_supported(arch, suite):
self.suites.append(suite(arch))

def check_supported(self, arch, suite):
if arch in suite.supported_archs:
return True
else:
warnings.warn(
f'Arch [{arch_name(arch)}] does not exist in {suite.__name__}.supported_archs.',
UserWarning,
stacklevel=2)
return False

def run(self):
print("Running...")
for s in self.suites:
s.run()

def store_to_path(self, path_with_file_name='./performance_result.md'):
with open(path_with_file_name, 'w') as f:
for arch in test_archs:
for s in self.suites:
lines = s.mdlines(arch)
for line in lines:
print(line, file=f)

def store_with_date_and_commit_id(self, file_dir='./'):
current_time = datetime.datetime.now().strftime("%Y%m%dd%Hh%Mm%Ss")
commit_hash = _ti_core.get_commit_hash()[:8]
file_name = f'perfresult_{current_time}_{commit_hash}.md'
path = os.path.join(file_dir, file_name)
print('Storing benchmark result to: ' + path)
self.store_to_path(path)
print(f'Arch [{arch_name(self.arch)}] Running...')
for suite in self.suites:
suite.run()

def save_to_markdown(self, arch_dir='./'):
current_time = datatime_with_format()
commit_hash = _ti_core.get_commit_hash() #[:8]
for suite in self.suites:
file_name = f'{suite.suite_name}.md'
path = os.path.join(arch_dir, file_name)
with open(path, 'w') as f:
lines = [
f'commit_hash: {commit_hash}\n',
f'datatime: {current_time}\n'
]
lines += suite.get_markdown_lines()
for line in lines:
print(line, file=f)


def main():
file_dir = sys.argv[1] if len(sys.argv) > 1 else './'
p = PerformanceMonitoring()
p.run()
p.store_to_path() # for /benchmark
p.store_with_date_and_commit_id(file_dir) #for postsubmit

benchmark_dir = os.path.join(os.getcwd(), 'results')
os.makedirs(benchmark_dir)

for arch in benchmark_archs:
#make dir
arch_dir = os.path.join(benchmark_dir, arch_name(arch))
os.makedirs(arch_dir)
#init & run
suites = BenchmarkSuites(arch)
suites.run()
#save result
suites.save_to_markdown(arch_dir)


if __name__ == '__main__':
Expand Down
29 changes: 19 additions & 10 deletions benchmarks/misc/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime

import taichi as ti

kibibyte = 1024
Expand All @@ -13,28 +15,35 @@
(1073741824.0, 'GB'), (float('inf'), 'INF')] #B KB MB GB


def arch_name(arch):
return str(arch).replace('Arch.', '')


def datatime_with_format():
return datetime.datetime.now().isoformat()


def size2str(size_in_byte):
for dsize, units in reversed(size_subsection):
if size_in_byte >= dsize:
return str(round(size_in_byte / dsize, 4)) + units


def scale_repeat(arch, datasize, repeat=10):
scaled = repeat
if (arch == ti.gpu) | (arch == ti.opengl) | (arch == ti.cuda):
scaled *= 10
if datasize <= 4 * 1024 * 1024:
scaled *= 10
return scaled


def geometric_mean(data_array):
product = 1
for data in data_array:
product *= data
return pow(product, 1.0 / len(data_array))


def scaled_repeat_times(arch, datasize, repeat=1):
if (arch == ti.gpu) | (arch == ti.opengl) | (arch == ti.cuda):
repeat *= 10
if datasize <= 4 * 1024 * 1024:
repeat *= 10
return repeat


def md_table_header(suite_name, arch, test_dsize, test_repeat,
results_evaluation):
header = '|' + suite_name + '.' + ti.core.arch_name(arch) + '|'
Expand All @@ -51,7 +60,7 @@ def md_table_header(suite_name, arch, test_dsize, test_repeat,

repeat = '|**repeat**|'
repeat += ''.join(
str(scale_repeat(arch, size, test_repeat)) + '|'
str(scaled_repeat_times(arch, size, test_repeat)) + '|'
for size in test_dsize)
repeat += ''.join('|' for i in range(len(results_evaluation)))

Expand Down