Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions tools/run_benchmark_with_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""Run an mcoplib benchmark command and capture reproducible logs."""

from __future__ import annotations

import argparse
import json
import os
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path


def build_command(root: Path, op: str, extra_args: list[str]) -> list[str]:
return [
sys.executable,
str(root / "benchmark" / "mcoplib_mxbenchmark_ops.py"),
"--op",
op,
*extra_args,
]


def run_with_logs(root: Path, op: str, log_root: Path, extra_args: list[str]) -> int:
if not op or not all(ch.isalnum() or ch == "_" for ch in op):
print(
f"Error: Invalid operator name '{op}'. Only alphanumeric characters and underscores are allowed.",
file=sys.stderr,
)
return 1

benchmark_dir = root / "benchmark"
script_path = benchmark_dir / "mcoplib_mxbenchmark_ops.py"
if not script_path.is_file():
print(f"Error: Benchmark script not found at '{script_path}'", file=sys.stderr)
return 1

run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_{op}"
run_dir = log_root / run_id
run_dir.mkdir(parents=True, exist_ok=True)
Comment on lines +25 to +41

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

安全风险:路径穿越漏洞 (Path Traversal)

由于 --op 参数是由用户输入的,并且直接用于拼接生成 run_idrun_dir 路径:

run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_{op}"
run_dir = log_root / run_id

如果用户传入类似于 ../../etc 的恶意算子名称,可能会导致程序在预期的日志目录之外创建目录并写入 metadata.json 等文件。

改进建议:

  1. 限制 op 仅包含字母、数字和下划线,防止路径穿越。
  2. 在创建日志目录之前,先校验基准测试脚本 mcoplib_mxbenchmark_ops.py 是否存在,避免在路径不正确时创建空的/脏的日志目录。
def run_with_logs(root: Path, op: str, log_root: Path, extra_args: list[str]) -> int:
    if not op or not all(c.isalnum() or c == "_" for c in op):
        print(f"Error: Invalid operator name '{op}'. Only alphanumeric characters and underscores are allowed.", file=sys.stderr)
        return 1

    benchmark_dir = root / "benchmark"
    script_path = benchmark_dir / "mcoplib_mxbenchmark_ops.py"
    if not script_path.is_file():
        print(f"Error: Benchmark script not found at '{script_path}'", file=sys.stderr)
        return 1

    run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_{op}"
    run_dir = log_root / run_id
    run_dir.mkdir(parents=True, exist_ok=True)


command = build_command(root, op, extra_args)
metadata = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"root": str(root),
"op": op,
"command": command,
"environment": {
"MACA_PATH": os.environ.get("MACA_PATH"),
"CUDA_HOME": os.environ.get("CUDA_HOME"),
"LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH"),
"PYTHONPATH": os.environ.get("PYTHONPATH"),
"CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES"),
"MACA_VISIBLE_DEVICES": os.environ.get("MACA_VISIBLE_DEVICES"),
},
Comment on lines +49 to +56

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

可维护性与可复现性改进:记录关键 GPU 环境变量

在进行算子基准测试时,CUDA_VISIBLE_DEVICESMACA_VISIBLE_DEVICES 决定了具体使用哪张 GPU 卡,而 PYTHONPATH 决定了 Python 模块的搜索路径。这些环境变量对于复现测试结果和排查问题至关重要。

建议在 metadata.jsonenvironment 中补充记录这些关键环境变量。

        "environment": {
            "MACA_PATH": os.environ.get("MACA_PATH"),
            "CUDA_HOME": os.environ.get("CUDA_HOME"),
            "LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH"),
            "PYTHONPATH": os.environ.get("PYTHONPATH"),
            "CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES"),
            "MACA_VISIBLE_DEVICES": os.environ.get("MACA_VISIBLE_DEVICES"),
        },

}
(run_dir / "metadata.json").write_text(
json.dumps(metadata, indent=2, ensure_ascii=False),
encoding="utf-8",
)

with (run_dir / "stdout.log").open("w", encoding="utf-8") as out, (
run_dir / "stderr.log"
).open("w", encoding="utf-8") as err:
proc = subprocess.run(command, cwd=benchmark_dir, stdout=out, stderr=err, text=True)

(run_dir / "exit_code.txt").write_text(str(proc.returncode) + "\n", encoding="utf-8")
print(f"Benchmark logs written to: {run_dir}")
return proc.returncode


def main() -> int:
parser = argparse.ArgumentParser(description="Run mcoplib benchmark with structured logs.")
parser.add_argument("--root", type=Path, default=Path(__file__).resolve().parents[1])
parser.add_argument("--op", required=True)
parser.add_argument("--log-root", type=Path, default=Path("benchmark_logs"))
args, extra = parser.parse_known_args()

return run_with_logs(args.root.resolve(), args.op, args.log_root.resolve(), extra)


if __name__ == "__main__":
raise SystemExit(main())
29 changes: 29 additions & 0 deletions unit_test/test_run_benchmark_with_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import sys
import tempfile
import unittest
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

from tools.run_benchmark_with_logs import build_command, run_with_logs


class RunBenchmarkWithLogsTest(unittest.TestCase):
def test_build_command_forwards_extra_args(self):
root = Path("/repo")
command = build_command(root, "rms_norm", ["--generate", "--csv", "out.csv"])

self.assertEqual(Path(command[1]), root / "benchmark" / "mcoplib_mxbenchmark_ops.py")
self.assertEqual(command[2:], ["--op", "rms_norm", "--generate", "--csv", "out.csv"])

def test_rejects_operator_names_with_path_traversal(self):
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)

code = run_with_logs(root, "../../etc", root / "logs", [])

self.assertEqual(code, 1)


if __name__ == "__main__":
unittest.main()