Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions mcoplib/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,16 @@ def _is_profiler_enabled() -> bool:


def _timestamp() -> str:
return datetime.now().strftime("%Y%m%dT%H%M%S")
return datetime.now().strftime("%Y%m%dT%H%M%S%f")


def _trace_file_path(output_dir, func_name, rank):
safe_name = "".join(ch if ch.isalnum() or ch in "._-" else "_" for ch in func_name)[:128]
filename = (
f"{safe_name}_trace_rank_{rank}_"
f"{_timestamp()}_pid_{os.getpid()}_tid_{threading.get_ident()}.json"
)
return os.path.join(output_dir, filename)
Comment on lines +22 to +28

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

在某些文件系统(如 ext4、NTFS 等)中,文件名长度限制为 255 个字符。如果 func_name 非常长(例如在一些经过多层装饰或自动生成的函数中),生成的 trace 文件名可能会超出限制,从而导致 OSError: [Errno 36] File name too long 异常。建议对 safe_name 进行截断(例如限制在 128 个字符以内),以提高代码的健壮性。

Suggested change
def _trace_file_path(output_dir, func_name, rank):
safe_name = "".join(ch if ch.isalnum() or ch in "._-" else "_" for ch in func_name)
filename = (
f"{safe_name}_trace_rank_{rank}_"
f"{_timestamp()}_pid_{os.getpid()}_tid_{threading.get_ident()}.json"
)
return os.path.join(output_dir, filename)
def _trace_file_path(output_dir, func_name, rank):
safe_name = "".join(ch if ch.isalnum() or ch in "._-" else "_" for ch in func_name)[:128]
filename = (
f"{safe_name}_trace_rank_{rank}_"
f"{_timestamp()}_pid_{os.getpid()}_tid_{threading.get_ident()}.json"
)
return os.path.join(output_dir, filename)



def _track_handler(prof, output_dir, func_name):
Expand Down Expand Up @@ -48,8 +57,8 @@ def _track_handler(prof, output_dir, func_name):
# If distributed environment is not initialized, use default value 0
rank = 0

# Export trace to local directory
trace_path = os.path.join(output_dir, f"{func_name}_trace_rank_{rank}.json")
# Export trace to a unique file so repeated benchmark runs do not overwrite evidence.
trace_path = _trace_file_path(output_dir, func_name, rank)
prof.export_chrome_trace(trace_path)
print(f"Chrome trace exported to: {trace_path}")

Expand Down
44 changes: 44 additions & 0 deletions unit_test/test_profiler_trace_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import sys
import tempfile
import unittest
from pathlib import Path
Comment on lines +1 to +5

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

为了在测试中使用 unittest.mock.patch 来模拟时间戳,我们需要导入 patch

Suggested change
import os
import sys
import tempfile
import unittest
from pathlib import Path
import os
import sys
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch

from unittest.mock import patch

sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

from mcoplib.profiler import _trace_file_path


class ProfilerTracePathTest(unittest.TestCase):
@patch("mcoplib.profiler._timestamp")
def test_trace_file_path_is_unique(self, mock_timestamp):
mock_timestamp.side_effect = ["20260610T120000000001", "20260610T120000000002"]
with tempfile.TemporaryDirectory() as tmp_path:
first = _trace_file_path(tmp_path, "fused_mla", 0)
second = _trace_file_path(tmp_path, "fused_mla", 0)

self.assertNotEqual(first, second)
self.assertTrue(first.endswith(".json"))
self.assertEqual(os.path.dirname(first), tmp_path)
Comment on lines +13 to +23

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

在运行速度非常快的测试环境中,连续两次调用 _trace_file_path 可能会在同一微秒内完成。由于 _timestamp() 的精度是微秒级,且 pidtid 相同,这会导致 firstsecond 的文件名完全一致,从而使 self.assertNotEqual(first, second) 断言失败,造成测试不稳定(flaky test)。建议使用 unittest.mock.patch 模拟 _timestamp 返回不同的值,以确保测试的确定性和稳定性。

Suggested change
class ProfilerTracePathTest(unittest.TestCase):
def test_trace_file_path_is_unique(self):
with tempfile.TemporaryDirectory() as tmp_path:
first = _trace_file_path(tmp_path, "fused_mla", 0)
second = _trace_file_path(tmp_path, "fused_mla", 0)
self.assertNotEqual(first, second)
self.assertTrue(first.endswith(".json"))
self.assertEqual(os.path.dirname(first), tmp_path)
class ProfilerTracePathTest(unittest.TestCase):
@patch("mcoplib.profiler._timestamp")
def test_trace_file_path_is_unique(self, mock_timestamp):
mock_timestamp.side_effect = ["20231011T120000000001", "20231011T120000000002"]
with tempfile.TemporaryDirectory() as tmp_path:
first = _trace_file_path(tmp_path, "fused_mla", 0)
second = _trace_file_path(tmp_path, "fused_mla", 0)
self.assertNotEqual(first, second)
self.assertTrue(first.endswith(".json"))
self.assertEqual(os.path.dirname(first), tmp_path)


def test_trace_file_path_sanitizes_function_name(self):
with tempfile.TemporaryDirectory() as tmp_path:
path = _trace_file_path(tmp_path, "op/name with spaces", 1)

filename = os.path.basename(path)
self.assertTrue(filename.startswith("op_name_with_spaces_trace_rank_1_"))
self.assertNotIn("/", filename)
self.assertNotIn(" ", filename)

def test_trace_file_path_truncates_long_function_name(self):
with tempfile.TemporaryDirectory() as tmp_path:
path = _trace_file_path(tmp_path, "x" * 300, 0)

filename = os.path.basename(path)
prefix = filename.split("_trace_rank_", 1)[0]
self.assertEqual(len(prefix), 128)


if __name__ == "__main__":
unittest.main()