Skip to content

Commit f2be1fc

Browse files
committed
Add test verification workflow and safer generation
1 parent 8d23598 commit f2be1fc

21 files changed

Lines changed: 920 additions & 250 deletions

.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "autocode",
3-
"version": "0.6.0",
3+
"version": "0.7.0",
44
"description": "Claude Code plugin for competitive programming problem-setting workflows.",
55
"author": {
66
"name": "SummerOneTwo",

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.7.0] - 2026-04-27
9+
10+
### Features
11+
12+
- **source_path 直接编译**: 当使用 `source_path` 参数时,直接从原始文件编译,不再覆盖到标准位置。标准位置仍保留副本以供其他工具使用。所有构建工具返回 `canonical_path`(标准位置副本)和 `source_path`(实际编译源)。
13+
- **resolve_source() 公共函数**: 提取 5 个构建工具中的源码解析逻辑到 `mixins.py``resolve_source()` 函数和 `ResolvedSource` 数据类,消除约 100 行重复代码。
14+
- **name 参数**: `solution_build``solution_run` 新增 `name` 参数,支持自定义文件名(如 `name="brute_force"` 替代默认 `brute`)。
15+
- **sol_name / brute_name**: `stress_test_run` 新增 `sol_name``brute_name` 参数,支持查找自定义命名的解法二进制文件。
16+
- **output_dir 参数**: `problem_generate_tests` 新增 `output_dir` 参数,可指定测试数据输出目录(默认 `problem_dir/tests`)。
17+
- **extra_args 参数**: `stress_test_run``generator_run``problem_generate_tests``test_configs` 新增 `extra_args` 参数,支持传递自定义命令行参数给 generator。协议扩展为 `gen.exe <seed> <type> <n_min> <n_max> <t_min> <t_max> [extra_args...]`
18+
- **types 参数**: `stress_test_run` 新增 `types` 参数,支持在对拍中循环使用多种生成策略(如 `["1","2","3","4"]`)。
19+
- **problem_verify_tests 工具**: 新增测试数据验证工具,检查文件配对、答案一致性(重新运行 sol)、validator 验证、无空文件等。
20+
- **stress_test_run 统计信息**: 对拍通过/失败时返回详细统计,包括 sol/brute 运行时间分布、N 值分布、最慢轮次等。
21+
- **构建结果透明度**: 所有构建工具返回 `binary_size``canonical_path``source_path` 返回实际编译源文件路径。
22+
23+
### Improvements
24+
25+
- **smart mode 文档**: `problem_generate_tests``constraints` 参数说明更明确,返回 `effective_test_configs` 展示实际使用的配置。
26+
- **workflow_guard 自定义命名**: `infer_state()` 支持自定义解法文件名(前缀匹配),新增 `tests_verified` 状态字段。
27+
- **工作流步骤更新**: 新增 `problem_verify_tests(passed)` 步骤,位于 `problem_generate_tests``problem_pack_polygon` 之间。
28+
829
## [0.6.0] - 2026-04-25
930

1031
### Features

CLAUDE.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ AutoCode/
6666
| stress_test_run | 压力测试 |
6767
| problem_create | 初始化题目 |
6868
| problem_generate_tests | 生成测试数据 |
69+
| problem_verify_tests | 验证测试数据质量 |
6970
| problem_validate | 验证题面样例 |
7071
| problem_pack_polygon | 打包为 Polygon 格式 |
7172

@@ -102,7 +103,8 @@ AutoCode/
102103
6. 运行压力测试 (`stress_test_run`, completed_rounds == total_rounds)
103104
7. 按需构建检查器 (`checker_build`, accuracy >= 0.9)
104105
8. 生成测试数据 (`problem_generate_tests`, generated_test_count > 0)
105-
9. 打包 Polygon (`problem_pack_polygon`)
106+
9. 验证测试数据 (`problem_verify_tests`, passed)
107+
10. 打包 Polygon (`problem_pack_polygon`)
106108

107109
该顺序会被 [hooks/hooks.json](/c:/userProgram/program/AutoCode/hooks/hooks.json)[scripts/workflow_guard.py](/c:/userProgram/program/AutoCode/scripts/workflow_guard.py) 实际强制执行。
108110

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "autocode-mcp"
3-
version = "0.6.0"
3+
version = "0.7.0"
44
description = "MCP Server for competitive programming problem creation, based on AutoCode paper"
55
readme = "README.md"
66
requires-python = ">=3.10"

scripts/workflow_guard.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@ def state_file(problem_dir: str) -> Path:
3636

3737
def infer_state(problem_dir: str) -> dict[str, Any]:
3838
root = Path(problem_dir)
39+
solutions_dir = root / "solutions"
3940
return {
4041
"problem_dir": str(root),
4142
"created": root.exists() and (root / "files").exists() and (root / "solutions").exists(),
42-
"sol_built": (root / "solutions" / "sol.cpp").exists() or any(root.glob("solutions/sol.*")),
43-
"brute_built": (root / "solutions" / "brute.cpp").exists() or any(root.glob("solutions/brute.*")),
43+
"sol_built": _has_solution(solutions_dir, "sol"),
44+
"brute_built": _has_solution(solutions_dir, "brute"),
4445
"validator_ready": (root / "files" / "val.cpp").exists() or any(root.glob("files/val.*")),
4546
"validator_accuracy": None,
4647
"generator_built": (root / "files" / "gen.cpp").exists() or any(root.glob("files/gen.*")),
@@ -54,10 +55,25 @@ def infer_state(problem_dir: str) -> dict[str, Any]:
5455
"validation_passed": False,
5556
"tests_generated": any((root / "tests").glob("*.in")) if (root / "tests").exists() else False,
5657
"generated_test_count": len(list((root / "tests").glob("*.in"))) if (root / "tests").exists() else 0,
58+
"tests_verified": False,
5759
"packaged": (root / "problem.xml").exists(),
5860
}
5961

6062

63+
def _has_solution(solutions_dir: Path, prefix: str) -> bool:
64+
"""检查 solutions/ 下是否有指定前缀的解法文件(支持自定义命名)。"""
65+
if not solutions_dir.exists():
66+
return False
67+
# 精确匹配(如 sol.cpp, brute.cpp)
68+
if (solutions_dir / f"{prefix}.cpp").exists():
69+
return True
70+
# 前缀匹配(如 brute_force.cpp)
71+
for f in solutions_dir.iterdir():
72+
if f.is_file() and f.stem.startswith(prefix) and f.suffix == ".cpp":
73+
return True
74+
return False
75+
76+
6177
def load_state(problem_dir: str) -> dict[str, Any]:
6278
path = state_file(problem_dir)
6379
if path.exists():
@@ -244,7 +260,8 @@ def session_start() -> int:
244260
"stress_test_run(completed_rounds == total_rounds) -> "
245261
"checker_build if needed (accuracy >= 0.9) -> "
246262
"problem_validate(validation_passed) -> "
247-
"problem_generate_tests(generated_test_count > 0) -> problem_pack_polygon. "
263+
"problem_generate_tests(generated_test_count > 0) -> "
264+
"problem_verify_tests(passed) -> problem_pack_polygon. "
248265
"If a hook blocks a step, complete the missing prerequisite instead of retrying blindly."
249266
)
250267
print(

src/autocode_mcp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"""
77
import os
88

9-
__version__ = "0.6.0"
9+
__version__ = "0.7.0"
1010

1111
# 获取 templates 目录路径(包内目录)
1212
_PACKAGE_DIR = os.path.dirname(__file__)

src/autocode_mcp/server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
MCP Server 入口。
33
4-
提供 15 个原子工具,基于 AutoCode 论文框架。
4+
提供 17 个原子工具,基于 AutoCode 论文框架。
55
"""
66

77
from __future__ import annotations
@@ -35,6 +35,7 @@
3535
from .tools.problem import ProblemCreateTool, ProblemGenerateTestsTool, ProblemPackPolygonTool
3636
from .tools.solution import SolutionBuildTool, SolutionRunTool
3737
from .tools.stress_test import StressTestRunTool
38+
from .tools.test_verify import ProblemVerifyTestsTool
3839
from .tools.validation import ProblemValidateTool
3940
from .tools.validator import ValidatorBuildTool, ValidatorSelectTool
4041

@@ -67,6 +68,7 @@ def register_all_tools() -> None:
6768
# Problem 工具组
6869
register_tool(ProblemCreateTool())
6970
register_tool(ProblemGenerateTestsTool())
71+
register_tool(ProblemVerifyTestsTool())
7072
register_tool(ProblemPackPolygonTool())
7173
register_tool(ProblemValidateTool())
7274

src/autocode_mcp/tools/checker.py

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from ..utils.compiler import run_binary_with_args
1212
from ..utils.platform import get_exe_extension
1313
from .base import Tool, ToolResult
14-
from .mixins import BuildToolMixin
14+
from .mixins import BuildToolMixin, resolve_source
1515

1616

1717
class CheckerBuildTool(Tool, BuildToolMixin):
@@ -91,58 +91,44 @@ async def execute(
9191
compiler: str = "g++",
9292
) -> ToolResult:
9393
"""执行 Checker 构建。"""
94-
# 解析源代码:source_path 优先于 code
95-
source_dir = None
96-
if source_path:
97-
if not os.path.isabs(source_path):
98-
source_path = os.path.join(problem_dir, source_path)
99-
if not os.path.exists(source_path):
100-
return ToolResult.fail(f"Source file not found: {source_path}")
101-
try:
102-
with open(source_path, encoding="utf-8") as f:
103-
code = f.read()
104-
except UnicodeDecodeError:
105-
try:
106-
with open(source_path, encoding="latin-1") as f:
107-
code = f.read()
108-
except Exception as e:
109-
return ToolResult.fail(f"Failed to read source file: {e}")
110-
source_dir = os.path.dirname(os.path.abspath(source_path))
111-
elif code is None:
112-
return ToolResult.fail("Either 'code' or 'source_path' must be provided")
94+
resolved, err = resolve_source(problem_dir, code, source_path)
95+
if resolved is None:
96+
return err
11397

11498
os.makedirs(problem_dir, exist_ok=True)
115-
116-
# 保存到 files/ 子目录
11799
files_dir = os.path.join(problem_dir, "files")
118100
os.makedirs(files_dir, exist_ok=True)
119101

120-
# 保存代码
121-
source_path = os.path.join(files_dir, "checker.cpp")
102+
canonical_path = os.path.join(files_dir, "checker.cpp")
122103
try:
123-
with open(source_path, "w", encoding="utf-8") as f:
124-
f.write(code)
104+
with open(canonical_path, "w", encoding="utf-8") as f:
105+
f.write(resolved.code)
125106
except Exception as e:
126107
return ToolResult.fail(f"Failed to save code: {str(e)}")
127108

128-
# 编译
129109
binary_path = os.path.join(files_dir, f"checker{get_exe_extension()}")
130110

131-
include_dirs = [source_dir] if source_dir else None
132-
compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs)
111+
compile_source = resolved.original_source_path or canonical_path
112+
include_dirs = [resolved.include_dir] if resolved.include_dir else None
113+
compile_result = await self.build(compile_source, binary_path, compiler=compiler, include_dirs=include_dirs)
133114

134115
if not compile_result.success:
135116
return ToolResult.fail(
136117
f"Compilation failed: {compile_result.error}",
137-
source_path=source_path,
118+
source_path=compile_source,
119+
canonical_path=canonical_path,
138120
compile_log=compile_result.stderr,
139121
)
140122

123+
binary_size = os.path.getsize(binary_path) if os.path.exists(binary_path) else 0
124+
141125
# 如果没有测试场景,直接返回成功
142126
if not test_scenarios:
143127
return ToolResult.ok(
144-
source_path=source_path,
128+
source_path=compile_source,
129+
canonical_path=canonical_path,
145130
binary_path=binary_path,
131+
binary_size=binary_size,
146132
compile_log=compile_result.stderr,
147133
message="Checker built successfully (no test scenarios provided)",
148134
)
@@ -214,8 +200,10 @@ async def execute(
214200
accuracy = correct_count / total if total > 0 else 0
215201

216202
return ToolResult.ok(
217-
source_path=source_path,
203+
source_path=compile_source,
204+
canonical_path=canonical_path,
218205
binary_path=binary_path,
206+
binary_size=binary_size,
219207
compile_log=compile_result.stderr,
220208
test_results=test_results,
221209
correct_count=correct_count,

src/autocode_mcp/tools/generator.py

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from ..utils.compiler import run_binary, run_binary_with_args
1313
from ..utils.platform import get_exe_extension
1414
from .base import Tool, ToolResult
15-
from .mixins import BuildToolMixin
15+
from .mixins import BuildToolMixin, resolve_source
1616

1717

1818
class GeneratorBuildTool(Tool, BuildToolMixin):
@@ -74,55 +74,43 @@ async def execute(
7474
compiler: str = "g++",
7575
) -> ToolResult:
7676
"""执行 Generator 构建。"""
77-
# 解析源代码:source_path 优先于 code
78-
source_dir = None
79-
if source_path:
80-
if not os.path.isabs(source_path):
81-
source_path = os.path.join(problem_dir, source_path)
82-
if not os.path.exists(source_path):
83-
return ToolResult.fail(f"Source file not found: {source_path}")
84-
try:
85-
with open(source_path, encoding="utf-8") as f:
86-
code = f.read()
87-
except UnicodeDecodeError:
88-
try:
89-
with open(source_path, encoding="latin-1") as f:
90-
code = f.read()
91-
except Exception as e:
92-
return ToolResult.fail(f"Failed to read source file: {e}")
93-
source_dir = os.path.dirname(os.path.abspath(source_path))
94-
elif code is None:
95-
return ToolResult.fail("Either 'code' or 'source_path' must be provided")
77+
resolved, err = resolve_source(problem_dir, code, source_path)
78+
if resolved is None:
79+
return err
9680

9781
os.makedirs(problem_dir, exist_ok=True)
98-
99-
# 保存到 files/ 子目录
10082
files_dir = os.path.join(problem_dir, "files")
10183
os.makedirs(files_dir, exist_ok=True)
10284

103-
source_path = os.path.join(files_dir, "gen.cpp")
85+
canonical_path = os.path.join(files_dir, "gen.cpp")
10486
try:
105-
with open(source_path, "w", encoding="utf-8") as f:
106-
f.write(code)
87+
with open(canonical_path, "w", encoding="utf-8") as f:
88+
f.write(resolved.code)
10789
except Exception as e:
10890
return ToolResult.fail(f"Failed to save code: {str(e)}")
10991

11092
exe_ext = get_exe_extension()
11193
binary_path = os.path.join(files_dir, f"gen{exe_ext}")
11294

113-
include_dirs = [source_dir] if source_dir else None
114-
compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs)
95+
compile_source = resolved.original_source_path or canonical_path
96+
include_dirs = [resolved.include_dir] if resolved.include_dir else None
97+
compile_result = await self.build(compile_source, binary_path, compiler=compiler, include_dirs=include_dirs)
11598

11699
if not compile_result.success:
117100
return ToolResult.fail(
118101
f"Compilation failed: {compile_result.error}",
119-
source_path=source_path,
102+
source_path=compile_source,
103+
canonical_path=canonical_path,
120104
compile_log=compile_result.stderr,
121105
)
122106

107+
binary_size = os.path.getsize(binary_path) if os.path.exists(binary_path) else 0
108+
123109
return ToolResult.ok(
124-
source_path=source_path,
110+
source_path=compile_source,
111+
canonical_path=canonical_path,
125112
binary_path=binary_path,
113+
binary_size=binary_size,
126114
compile_log=compile_result.stderr,
127115
message="Generator built successfully",
128116
)
@@ -207,6 +195,12 @@ def input_schema(self) -> dict:
207195
"description": "T 最大值",
208196
"default": 1,
209197
},
198+
"extra_args": {
199+
"type": "array",
200+
"items": {"type": "string"},
201+
"description": "附加命令行参数,追加在标准 6 参数之后传递给 generator",
202+
"default": [],
203+
},
210204
},
211205
"required": ["problem_dir", "strategies"],
212206
}
@@ -222,9 +216,11 @@ async def execute(
222216
n_max: int = 100000,
223217
t_min: int = 1,
224218
t_max: int = 1,
219+
extra_args: list[str] | None = None,
225220
) -> ToolResult:
226221
"""执行数据生成。"""
227222
exe_ext = get_exe_extension()
223+
extra_args = extra_args or []
228224

229225
# 检查 generator - 优先查找 files/ 子目录
230226
gen_exe = os.path.join(problem_dir, "files", f"gen{exe_ext}")
@@ -262,8 +258,8 @@ async def execute(
262258
type_param = strategy_type_map.get(strategy, "2")
263259

264260
# 运行 generator
265-
# gen.exe <seed> <type> <n_min> <n_max> <t_min> <t_max>
266-
cmd_args = [str(seed), type_param, str(n_min), str(n_max), str(t_min), str(t_max)]
261+
# gen.exe <seed> <type> <n_min> <n_max> <t_min> <t_max> [extra_args...]
262+
cmd_args = [str(seed), type_param, str(n_min), str(n_max), str(t_min), str(t_max)] + extra_args
267263

268264
gen_result = await run_binary_with_args(
269265
gen_exe,

0 commit comments

Comments
 (0)