生成唯一 MACA kernel dump 路径#28
Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces a stable, content-addressed naming scheme for kernel dump files in the MXCC compiler integration using SHA256 hashing, along with a new unit test suite. Feedback on these changes suggests avoiding the risky mocking of sys.modules in the test file by directly importing mxcc from tvm.contrib, and defensively handling cases where the input code is bytes or None in _kernel_file_name to prevent potential runtime errors.
Important
The consumer version of Gemini Code Assist on GitHub is being sunset. Starting June 18, 2026, new organization installations will be blocked, and all code review activity will officially cease on July 17, 2026.
For more details on the timeline and next steps, please review the Help Documentation.
| import hashlib | ||
| import importlib.util | ||
| import sys | ||
| import types | ||
| import unittest | ||
| from pathlib import Path | ||
|
|
||
|
|
||
| REPO_ROOT = Path(__file__).resolve().parents[3] | ||
| MXCC_PATH = REPO_ROOT / "python" / "tvm" / "contrib" / "mxcc.py" | ||
|
|
||
| tvm_ffi = types.ModuleType("tvm_ffi") | ||
| tvm_ffi.register_global_func = lambda *args, **kwargs: ( | ||
| (lambda func: func) if args and callable(args[0]) else (lambda func: func) | ||
| ) | ||
| tvm_ffi.get_global_func = lambda _name: None | ||
| sys.modules["tvm_ffi"] = tvm_ffi | ||
|
|
||
| tvm = types.ModuleType("tvm") | ||
| tvm.maca = lambda *_args, **_kwargs: types.SimpleNamespace(exist=False) | ||
| tvm.target = types.ModuleType("tvm.target") | ||
| tvm.target.Target = object | ||
| sys.modules["tvm"] = tvm | ||
| sys.modules["tvm.target"] = tvm.target | ||
|
|
||
| tvm_base = types.ModuleType("tvm.base") | ||
| tvm_base.py_str = lambda value: value.decode("utf-8") if isinstance(value, bytes) else str(value) | ||
| sys.modules["tvm.base"] = tvm_base | ||
|
|
||
| tvm_contrib = types.ModuleType("tvm.contrib") | ||
| tvm_contrib.__path__ = [] | ||
| sys.modules["tvm.contrib"] = tvm_contrib | ||
|
|
||
| tvm_contrib_utils = types.ModuleType("tvm.contrib.utils") | ||
| tvm_contrib_utils.tempdir = lambda: None | ||
| sys.modules["tvm.contrib.utils"] = tvm_contrib_utils | ||
|
|
||
| spec = importlib.util.spec_from_file_location("tvm.contrib.mxcc", MXCC_PATH) | ||
| mxcc = importlib.util.module_from_spec(spec) | ||
| mxcc.__package__ = "tvm.contrib" | ||
| sys.modules["tvm.contrib.mxcc"] = mxcc | ||
| assert spec.loader is not None | ||
| spec.loader.exec_module(mxcc) |
There was a problem hiding this comment.
在测试文件中通过修改 sys.modules 来 mock tvm 及其相关子模块是一种非常危险的做法。如果该测试与其他测试在同一个 Python 进程中运行(例如使用 pytest 运行整个测试套件),这会全局污染 sys.modules,导致后续执行的真实 TVM 测试因获取到 mock 对象而失败。
由于在 TVM 的测试环境中,真实的 tvm 库已经是可用且配置好的,因此无需进行任何 mock。可以直接从 tvm.contrib 导入 mxcc 模块进行测试。
| import hashlib | |
| import importlib.util | |
| import sys | |
| import types | |
| import unittest | |
| from pathlib import Path | |
| REPO_ROOT = Path(__file__).resolve().parents[3] | |
| MXCC_PATH = REPO_ROOT / "python" / "tvm" / "contrib" / "mxcc.py" | |
| tvm_ffi = types.ModuleType("tvm_ffi") | |
| tvm_ffi.register_global_func = lambda *args, **kwargs: ( | |
| (lambda func: func) if args and callable(args[0]) else (lambda func: func) | |
| ) | |
| tvm_ffi.get_global_func = lambda _name: None | |
| sys.modules["tvm_ffi"] = tvm_ffi | |
| tvm = types.ModuleType("tvm") | |
| tvm.maca = lambda *_args, **_kwargs: types.SimpleNamespace(exist=False) | |
| tvm.target = types.ModuleType("tvm.target") | |
| tvm.target.Target = object | |
| sys.modules["tvm"] = tvm | |
| sys.modules["tvm.target"] = tvm.target | |
| tvm_base = types.ModuleType("tvm.base") | |
| tvm_base.py_str = lambda value: value.decode("utf-8") if isinstance(value, bytes) else str(value) | |
| sys.modules["tvm.base"] = tvm_base | |
| tvm_contrib = types.ModuleType("tvm.contrib") | |
| tvm_contrib.__path__ = [] | |
| sys.modules["tvm.contrib"] = tvm_contrib | |
| tvm_contrib_utils = types.ModuleType("tvm.contrib.utils") | |
| tvm_contrib_utils.tempdir = lambda: None | |
| sys.modules["tvm.contrib.utils"] = tvm_contrib_utils | |
| spec = importlib.util.spec_from_file_location("tvm.contrib.mxcc", MXCC_PATH) | |
| mxcc = importlib.util.module_from_spec(spec) | |
| mxcc.__package__ = "tvm.contrib" | |
| sys.modules["tvm.contrib.mxcc"] = mxcc | |
| assert spec.loader is not None | |
| spec.loader.exec_module(mxcc) | |
| import hashlib | |
| import unittest | |
| from tvm.contrib import mxcc |
| def _kernel_file_name(code): | ||
| """Return a stable kernel dump filename for the given MACA source.""" | ||
| digest = hashlib.sha256(code.encode("utf-8")).hexdigest()[:16] | ||
| return f"tvm_kernels_{digest}" |
There was a problem hiding this comment.
为了提高代码的健壮性(防御性编程),建议支持 code 为 bytes 或 None 的情况。如果 code 已经是 bytes 类型,直接调用 .encode("utf-8") 会抛出 AttributeError。
可以使用更兼容的方式来获取字节流。
| def _kernel_file_name(code): | |
| """Return a stable kernel dump filename for the given MACA source.""" | |
| digest = hashlib.sha256(code.encode("utf-8")).hexdigest()[:16] | |
| return f"tvm_kernels_{digest}" | |
| def _kernel_file_name(code): | |
| """Return a stable kernel dump filename for the given MACA source.""" | |
| code_bytes = code if isinstance(code, bytes) else str(code or "").encode("utf-8") | |
| digest = hashlib.sha256(code_bytes).hexdigest()[:16] | |
| return f"tvm_kernels_{digest}" |
该 PR 为 MACA kernel dump 生成唯一输出路径,避免并发测试或多次运行互相覆盖调试产物。
这个修改面向沐曦 GPU 适配场景中比较容易影响开发、构建或验证稳定性的环节,把原来需要人工排查的问题前移到工具链、运行前检查或基准脚本中处理。实现上保持对现有默认行为的兼容,只在检测到明确配置、输入或环境异常时给出更直接的诊断,避免引入额外运行依赖,也方便维护者独立审阅该分支。
已在沐曦算力环境中完成对应分支验证,验证记录包含真实运行日志、命令输出和失败路径检查,本地归档目录为:E:/Documents/muxi/测试报告/mcTVM_new_toolchain_validation_20260608。提交分支:
mengz/unique-maca-kernel-dumps,目标仓库:MetaX-MACA/mcTVM。