diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 608ff6b..81a460d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y cmake g++-13 ccache + sudo apt-get install -y cmake g++-13 ccache clang-format - name: Restore ccache uses: actions/cache@v4 @@ -44,6 +44,13 @@ jobs: - name: Test floor gates (count floor + new-applet-must-have-tests) run: bash tests/check_test_floor.sh + - name: Structure gates (banned-pattern + layering) + run: bash tests/check_structure_gates.sh + + - name: clang-format dry-run (advisory — version skew until toolchain pinned) + continue-on-error: true + run: clang-format --dry-run --Werror $(find src include tests -name '*.cpp' -o -name '*.hpp') + - name: Upload test results if: always() uses: actions/upload-artifact@v4 diff --git a/CLAUDE.md b/CLAUDE.md index 3bcc52c..28e2449 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,6 +8,8 @@ C++23 现代 BusyBox 替代品(单二进制,123 applet,399 GTest,418 KB - [document/ai/PLAN.md](document/ai/PLAN.md) — 当前焦点批级进度(批级,最易变) - [document/ai/CODING-TASTE.md](document/ai/CODING-TASTE.md) — 编码/注释风格权威(写代码前读) - [document/ai/COVERAGE.md](document/ai/COVERAGE.md) — 测试/正确性/差分标尺(覆盖率怎么算、怎么防退化,季级) +- [document/ai/STRUCTURE-TASTE.md](document/ai/STRUCTURE-TASTE.md) — 结构与工艺标尺(职责/DRY/边界/机械护栏,季级) +- [document/ai/PERFORMANCE.md](document/ai/PERFORMANCE.md) — 性能标尺(wall-clock 不动输出/4步闭环/Phase0 基建,季级) - [document/notes/](document/notes/) — 批级工作记录(`-.md`) ## 始终遵守(每条便宜,违规代价大) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8568c03..1faad5e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,4 +85,28 @@ if(GTest_ADDED) gtest_discover_tests(cfbox_tests) endif() endif() + +# ── google-benchmark for perf micro-benchmarks (see document/ai/PERFORMANCE.md) ── +# Opt-in: -DCFBOX_ENABLE_BENCHMARK=ON with Release -O2. Doesn't fetch unless enabled. +option(CFBOX_ENABLE_BENCHMARK "Build perf micro-benchmarks (Release -O2)" OFF) +if(CFBOX_ENABLE_BENCHMARK) +CPMAddPackage( + NAME benchmark + GITHUB_REPOSITORY google/benchmark + GIT_TAG v1.9.0 + OPTIONS "BENCHMARK_ENABLE_TESTING OFF" +) +if(benchmark_ADDED) + file(GLOB_RECURSE CFBOX_BENCH_SOURCES CONFIGURE_DEPENDS tests/benchmark/*.cpp) + if(CFBOX_BENCH_SOURCES) + add_executable(cfbox_bench ${CFBOX_BENCH_SOURCES} ${CFBOX_APPLET_SOURCES}) + target_include_directories(cfbox_bench PUBLIC include) + target_include_directories(cfbox_bench PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/include) + target_link_libraries(cfbox_bench PRIVATE + cfbox_compiler_flags + benchmark::benchmark_main + ) + endif() +endif() +endif() endif() diff --git a/document/ai/DIRECTIVES.md b/document/ai/DIRECTIVES.md index 76f7ea0..f82c0a0 100644 --- a/document/ai/DIRECTIVES.md +++ b/document/ai/DIRECTIVES.md @@ -21,6 +21,8 @@ - 注释一律英文;机械风格以 [.clang-format](../../.clang-format) 为准,跑 clang-format 不手调。 - Result:`auto v = CFBOX_TRY(expr)`(宏展开判 `!v` 后 `return std::unexpected`);成功 `return value;` / `Result` 用 `return {};`;失败 `return std::unexpected(base::make_error(code, msg));`。 - 测试 / 覆盖率 / 差分(对照)正确性标尺见 [COVERAGE.md](COVERAGE.md):三种"覆盖率"语义、对照测试(标准答案)、测试数量只升不降。 +- 结构与工艺标尺见 [STRUCTURE-TASTE.md](STRUCTURE-TASTE.md):职责/DRY/抽象边界/控制流纪律 + 机械护栏(禁裸 fopen、stoi/stol、不安全 C 函数;layering;clang-format 软门)。 +- 性能标尺见 [PERFORMANCE.md](PERFORMANCE.md):只动 wall-clock 不动输出、4 步闭环、Phase-0 测量基建(google-benchmark via CPM)先行。 ## C. 操作模型(长期,Claude 主力开发) diff --git a/document/ai/PERFORMANCE.md b/document/ai/PERFORMANCE.md new file mode 100644 index 0000000..65c077b --- /dev/null +++ b/document/ai/PERFORMANCE.md @@ -0,0 +1,45 @@ +# CFBox 性能 标尺 + +> 这份文档讲性能优化的规矩——什么该优化、怎么测、怎么保证改完没改坏输出。大概一个季度才动一次。放在 `document/ai/`,从 [CLAUDE.md](../../CLAUDE.md) 和 [DIRECTIVES.md](DIRECTIVES.md) 指过来,和 [COVERAGE.md](COVERAGE.md)、[STRUCTURE-TASTE.md](STRUCTURE-TASTE.md) 平级。 +> +> 一句话:性能优化的对象是"跑得快"(宿主 wall-clock),不是"算出不同的结果"——**cfbox 的输出字节 + 退出码在优化前后必须逐字节不变**,靠对照测试兜底(见 [COVERAGE.md](COVERAGE.md))。 + +## 一、核心立场(三条) + +1. **只动 wall-clock,绝不动输出**。优化 sed/grep/sort/md5sum 这些命令的吞吐时,输出必须和优化前(以及和 BusyBox/GNU oracle)逐字节一致。任何"优化"若改变了输出,就是 bug。 +2. **wall-clock 和 cycle-accuracy 是两根轴,不串**。CFBox 是宿主命令工具,没有 cycle-accurate 模拟器,只量真实 wall-clock;不要把"指令数"伪装成"用户体验"。 +3. **依赖原则**:cfbox **二进制实现**零外部依赖(手写 deflate/POSIX regex,保持小体积);但**基建/测试用权威标准工具优先**——测试用 GTest、性能用 google-benchmark,都经 CPM 拉入(CPM 已经在用)。 + +## 二、每次性能改动走 4 步闭环 + +1. **构造真热路径场景**:拿真实大输入(大文件 grep/cat/sort、大目录 ls/find、大 tar、大 md5sum)来测,不写"空转微循环"那种白赢基准。 +2. **对抗验保真**(两靶子): + - **场景保真**:基准真在跑那条热路径吗?没退化成"编译器一把梭"的常量循环吗? + - **测量可信**:方差/噪声多大?归因到对的代码了吗?编译器有没有把工作优化掉(看汇编/反汇编证伪)? +3. **确认 bench 提升 AND 正确性不变**:吞吐有可测提升,**且**对照 harness + GTest + 集成全绿。 +4. **纳入防退化**:把这条热路径的基准固化进 bench 套件,以后回归自动盯。 + +## 三、测量基建(Phase 0,优先于一切优化) + +现在**完全没有**性能基建,先建: + +- **bench 框架 = google-benchmark(经 CPMAddPackage,参照现有 GTest 用法)**。做**进程内微基准**:cfbox 的 `*_main(int, char**)` 入口可在进程内直接调(单测已经在这么干),bench 复用这个接缝,量热路径算法本身(排除进程启动开销)。 +- **构建档 = Release `-O2`**([CompilerFlag.cmake](../../cmake/compile/CompilerFlag.cmake) 已有,绝不 bench Debug/无优化构建)。 +- **端到端 wall-clock**:另配一个 shell 脚本,对比 cfbox vs `/usr/bin/$cmd`(coreutils)在大样本上的耗时,贴近真实用户体验;复用对照 harness 的 fixture。 +- **编译期 perf 计数器宏**:**暂缓**(在没有内测驱动前是死插桩;perf/callgrind 这种现成 profiler 对单二进制已能给源码级热点归因)。 +- **防退化门 = advisory**(容差带 + 中位数,CI 只报告不挡):bench 噪声和 CI 机器方差大,hard 门会误报;稳定后再考虑。 + +## 四、热路径清单(hunting 已确认,按收益排,附录) + +- **sed 每行重编译正则**([sed.cpp:229-262](../../src/applets/sed.cpp#L229-L262))— CPU-bound,high。修法:parse 阶段编译一次存进 SedCommand。 +- **md5sum 整文件 2× 内存 + 非流式**([checksum.hpp:73-81](../../include/cfbox/checksum.hpp#L73-L81))— 大文件 high。修法:md5 加 update/finalize 增量 API。 +- **tar -c 整归档缓存在一个 string**([tar.cpp:119-147](../../src/applets/tar.cpp#L119-L147))— high(条件)。修法:流式逐成员写。 +- **cmp 双文件全量载入,首差异不早退**([cmp.cpp:33-50](../../src/applets/cmp.cpp#L33-L50))— medium。修法:双缓冲块读 + memcmp。 +- **io::for_each_line 逐字符 fgetc**([io.hpp:137](../../include/cfbox/io.hpp#L137))— 实测约 10× 慢于块读,medium(公共骨架)。修法:块 fread + memchr 找换行。 +- **head/tail/sort/uniq 先 read_all 再 split** — medium。修法:head 流式早退、tail 反向 seek。 + +## 五、执行批次(标尺确认后,每批 propose-then-execute,全 behavior-preserving) + +- **批 0(基建)**:CPM 拉 google-benchmark + 加 `tests/benchmark/` 目标(Release -O2,进程内微基准)+ 端到端 wall-clock 脚本 + CI advisory 门。建立基线数字。 +- **批 1(演示闭环)**:sed 每行重编译 → 修,bench 证提升 + 对照 oracle 证输出不变 + 进防退化。 +- **批 2+**:md5sum 流式、tar 流式、cmp 早退、for_each_line 块读……每条都走 4 步闭环。 diff --git a/document/ai/STRUCTURE-TASTE.md b/document/ai/STRUCTURE-TASTE.md new file mode 100644 index 0000000..6f363d8 --- /dev/null +++ b/document/ai/STRUCTURE-TASTE.md @@ -0,0 +1,59 @@ +# CFBox 结构与工艺 标尺 + +> 这份文档讲代码结构与工艺原则——怎么让代码"好读、好改、不容易在改动时引入 bug"。大概一个季度才动一次。放在 `document/ai/` 下,从 [CLAUDE.md](../../CLAUDE.md) 和 [DIRECTIVES.md](DIRECTIVES.md) 指过来,和 [CODING-TASTE.md](CODING-TASTE.md)(管命名/格式那种微观风格)、[COVERAGE.md](COVERAGE.md) 平级。 +> +> 一句话:微观风格交给 clang-format 和 CODING-TASTE;这份管"函数多大、逻辑有没有重复、抽象边界有没有串"这种结构性的事。 + +## 一、七条结构与工艺原则(每条配 CFBox 真实例子) + +1. **一个函数干一件事、停在一个抽象层次**。规模超限往往是职责堆叠的症状。 + - 反例:tail.cpp(436 行)单文件混了"静态 tail + -f 跟随 + 信号处理"三件事;io::for_each_line 把"逐字符读"和"尾行边界处理"混在一层。 +2. **DRY——逻辑唯一归属,第二份出现就抽**。 + - 反例:用户名→名字解析(getpwuid 失败回退)在 ls/stat/id/whoami 写了 4 份,且回退策略不一致(stat 回退空串、其余回退数字);diff.cpp 的 hunk 行号统计整段复制两遍;sed.cpp 的分隔段抽取循环重复;sh_main 手写 fread 循环重复了 io::read_all。 +3. **抽象边界不串——通用层不长出应用语义**。 + - 反例:stream 层的 run_processor 自己拼 "cfbox:" 错误信息(应用语义泄漏进通用流层);fs::for_each_entry 静默吞掉迭代错误。 +4. **显式优先 / 数据驱动 dispatch——优先级是语义时做成有序表,别藏进 if-else 物理顺序**。 + - ✅ 正面:APPLET_REGISTRY 是 constexpr 数据表(分发靠查表,不是 if-else 链);find/test 用递归下降解析器,优先级做进语法结构。 +5. **可测试性是设计约束——留可独立测试的接缝**。 + - ✅ 正面:test_capture.hpp 在进程内直接调 `_main`,等于一个轻量 harness;反面:LineProcessor 这个虚基类被引入却零引用(死抽象)。 +6. **最简表达——别写"算恒等"的冗余代码**。 + - 反例:io::for_each_line 尾部两个 constexpr 分支体逐字相同;ErrorView 类型只在单测里用、生产零引用(YAGNI 死代码)。 +7. **控制流纪律——嵌套≤3 层;一行 if 也用 {}**。 + - 反例:个别函数嵌套 ≥3 层(diff 的 build_hunks)。整体偏干净。 + +## 二、机械护栏(脚本可查、版本无关的进 CI 当硬门;版本敏感的做软门) + +**硬门(CI exit 非零,精确匹配函数调用、不匹配注释/标识符):** + +- **禁不安全 C 函数**:`sprintf(`/`strcpy(`/`strcat(`/`gets(` → 用 `snprintf` / `std::string` / `std::string_view` 替代。现状:0 处真违规(历史 grep 命中全是注释/awk 函数名误报)。 +- **禁裸 `std::fopen`(在 applets 里)**:文本/二进制读写走 `cfbox::io::open_file`(带 Result + RAII)。现状:15 处,本维度清掉。 +- **禁 `std::stoi`/`std::stol`**:项目 -fno-exceptions,这俩在非法输入上抛异常→std::terminate(崩溃)。改 `std::strtol`+errno 或 `std::from_chars` + CFBOX_ERR 干净报错。现状:19 处(全是命令行数值参数解析),本维度清掉。 +- **layering 门**:applets 不自造递归目录遍历,用 `cfbox::fs::for_each_entry` 或加注释豁免。现状:5 处,grandfather 进基线清单,新加的裸奔才挡。 + +**软门(advisory,本地 + IDE;不上 CI Werror,直到 CI pin 死工具链版本):** + +- **clang-format dry-run**:CI 加一步检查(先只报告;CODING-TASTE 说"CI 也跑",现状没跑——补上)。 +- **clang-tidy**(含 `readability-function-size` 行数门、`readability-braces` 等):版本敏感,advisory。 + +## 三、每次改结构,走这三步 + +1. 先看牵连:grep 被改抽象的引用方(`cfbox::fs::`、`cfbox::io::`、被改头文件的 include 方)。 +2. 行为不变:结构改动不得改变可观测输出——靠 GTest + 集成 + 对照测试兜底(见 [COVERAGE.md](COVERAGE.md))。 +3. 收尾同步:改完跑 clang-format;结构性决策记进 [document/notes/](../notes/)。 + +## 四、附录:CFBox 现状(2026-06-28 勘查) + +- **死抽象**:`stream::LineProcessor`/`run_processor`([stream.hpp:57-78](../../include/cfbox/stream.hpp#L57-L78),0 引用)、`base::ErrorView`([error.hpp:14-17](../../include/cfbox/error.hpp#L14-L17),生产零引用)。 +- **DRY 债**:owner 名字解析 4 份回退不一致;diff hunk 统计复制两遍;sed 段抽取重复;sh_main 重复 read_all。 +- **banned-pattern**:裸 fopen 15 处、stoi/stol 19 处、不安全 C 函数 0 处真违规。 +- **layering**:5 个 applet 自造递归遍历(grep/find/tar/du/sysctl)。 +- **微观风格门缺失**:CI 没 clang-format/clang-tidy 步骤。 +- **大文件**(职责堆叠候选,单文件):tail.cpp 436、sed.cpp 368、ls.cpp 336、find.cpp 313。(sh/awk 是多文件按职责拆分,大但合理。) + +## 五、执行批次(标尺确认后,每批 propose-then-execute,全行为保持) + +- **批1(零风险)**:删死抽象 LineProcessor/run_processor + ErrorView(+ 其单测)。0 引用,体积小降。 +- **批2**:清 19 处 stoi/stol → strtol/from_chars + CFBOX_ERR(修潜在崩溃)。 +- **批3**:清 15 处裸 fopen → io::open_file / read_all / for_each_line。 +- **批4**:DRY 收敛——owner 名字解析(统一回退)、diff hunk、sed 段抽取。 +- **批5**:上机械护栏脚本(banned-pattern 精确匹配 + layering grandfather)+ clang-format dry-run,接进 CI。 diff --git a/include/cfbox/args.hpp b/include/cfbox/args.hpp index a7ef3de..b45f362 100644 --- a/include/cfbox/args.hpp +++ b/include/cfbox/args.hpp @@ -1,11 +1,17 @@ #pragma once +#include +#include +#include #include #include +#include #include #include #include +#include + namespace cfbox::args { struct OptSpec { @@ -156,4 +162,21 @@ inline auto parse(int argc, char* argv[], return result; } +// Parse s as a base-10 int with no throw (the project is -fno-exceptions, so std::stoi +// would std::terminate on bad input). Rejects empty / non-numeric / trailing junk / +// out-of-int-range; callers report via CFBOX_ERR on the unexpected path. +// Uses strtol () rather than from_chars (): args.hpp is included +// by every applet, and is a notoriously heavy header that blew up +// per-TU compile memory enough to OOM the parallel cross-compile builds. +[[nodiscard]] inline auto parse_int(std::string_view s) -> base::Result { + std::string tmp{s}; + char* end = nullptr; + errno = 0; + long v = std::strtol(tmp.c_str(), &end, 10); + if (errno != 0 || end == tmp.c_str() || *end != '\0' || v < INT_MIN || v > INT_MAX) { + return std::unexpected(base::Error{EINVAL, "not a valid integer: '" + std::string{s} + '\''}); + } + return static_cast(v); +} + } // namespace cfbox::args diff --git a/include/cfbox/checksum.hpp b/include/cfbox/checksum.hpp index fc7bf5c..ba1b90b 100644 --- a/include/cfbox/checksum.hpp +++ b/include/cfbox/checksum.hpp @@ -1,11 +1,11 @@ #pragma once +#include #include #include #include #include #include -#include namespace cfbox::checksum { @@ -36,100 +36,152 @@ inline auto md5_to_hex(const MD5Hash& hash) -> std::string { return result; } -inline auto md5(std::string_view data) -> MD5Hash { - MD5Hash result; - - static constexpr std::uint32_t K[64] = { - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, - 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, - 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, - 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, - 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, - 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, - 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, - 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, - 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, - }; - - static constexpr unsigned s[64] = { - 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, - 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, - 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, - 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, - }; - - auto left_rotate = [](std::uint32_t x, unsigned c) -> std::uint32_t { - return (x << c) | (x >> (32 - c)); - }; - - // Padding - std::size_t orig_len = data.size(); - auto bit_len = static_cast(orig_len) * 8; - std::size_t padded_len = ((orig_len + 8) / 64 + 1) * 64; - std::vector msg(padded_len, 0); - std::memcpy(msg.data(), data.data(), orig_len); - msg[orig_len] = 0x80; - for (int j = 0; j < 8; ++j) { - msg[padded_len - 8 + j] = static_cast(bit_len >> (j * 8)); +namespace detail { + +inline constexpr std::uint32_t k_md5_K[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; + +inline constexpr unsigned k_md5_s[64] = { + 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, + 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, + 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, + 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, +}; + +inline auto md5_left_rotate(std::uint32_t x, unsigned c) -> std::uint32_t { + return (x << c) | (x >> (32 - c)); +} + +// Compress one 64-byte block into the running state. +inline auto md5_process(std::uint32_t& a0, std::uint32_t& b0, std::uint32_t& c0, + std::uint32_t& d0, const std::uint8_t* p) -> void { + std::uint32_t M[16]; + for (std::size_t j = 0; j < 16; ++j) { + auto base = j * 4; + M[j] = static_cast(p[base]) + | (static_cast(p[base + 1]) << 8) + | (static_cast(p[base + 2]) << 16) + | (static_cast(p[base + 3]) << 24); } - std::uint32_t a0 = 0x67452301; - std::uint32_t b0 = 0xEFCDAB89; - std::uint32_t c0 = 0x98BADCFE; - std::uint32_t d0 = 0x10325476; - - for (std::size_t offset = 0; offset < padded_len; offset += 64) { - std::uint32_t M[16]; - for (std::size_t j = 0; j < 16; ++j) { - auto base = offset + j * 4; - M[j] = static_cast(msg[base]) - | (static_cast(msg[base + 1]) << 8) - | (static_cast(msg[base + 2]) << 16) - | (static_cast(msg[base + 3]) << 24); + std::uint32_t A = a0, B = b0, C = c0, D = d0; + + for (int j = 0; j < 64; ++j) { + std::uint32_t F; + int idx; + if (j < 16) { + F = (B & C) | (~B & D); + idx = j; + } else if (j < 32) { + F = (D & B) | (~D & C); + idx = (5 * j + 1) % 16; + } else if (j < 48) { + F = B ^ C ^ D; + idx = (3 * j + 5) % 16; + } else { + F = C ^ (B | ~D); + idx = (7 * j) % 16; } + F = F + A + k_md5_K[j] + M[idx]; + A = D; + D = C; + C = B; + B = B + md5_left_rotate(F, k_md5_s[j]); + } - std::uint32_t A = a0, B = b0, C = c0, D = d0; - - for (int j = 0; j < 64; ++j) { - std::uint32_t F; - int idx; - if (j < 16) { - F = (B & C) | (~B & D); - idx = j; - } else if (j < 32) { - F = (D & B) | (~D & C); - idx = (5 * j + 1) % 16; - } else if (j < 48) { - F = B ^ C ^ D; - idx = (3 * j + 5) % 16; - } else { - F = C ^ (B | ~D); - idx = (7 * j) % 16; - } - F = F + A + K[j] + M[idx]; - A = D; - D = C; - C = B; - B = B + left_rotate(F, s[j]); - } + a0 += A; b0 += B; c0 += C; d0 += D; +} + +} // namespace detail + +// Incremental MD5 — O(1) scratch memory. Feed data in chunks via update(), then +// call finalize(). Replaces the old one-shot md5() that padded the whole input +// into a second buffer (2× memory + had to read it all before hashing). +struct MD5 { + std::uint32_t a0 = 0x67452301, b0 = 0xEFCDAB89, c0 = 0x98BADCFE, d0 = 0x10325476; + std::uint64_t total_bytes = 0; + std::array buf{}; + unsigned buf_len = 0; - a0 += A; b0 += B; c0 += C; d0 += D; + auto update(const std::uint8_t* data, std::size_t len) -> void { + total_bytes += len; + feed(data, len); } - for (int j = 0; j < 4; ++j) { - result.bytes[j] = static_cast(a0 >> (j * 8)); - result.bytes[j + 4] = static_cast(b0 >> (j * 8)); - result.bytes[j + 8] = static_cast(c0 >> (j * 8)); - result.bytes[j + 12] = static_cast(d0 >> (j * 8)); + auto finalize() -> MD5Hash { + // Pad with 0x80, zeros, then 8-byte little-endian bit length so the + // length lands in the last 8 bytes of a 64-byte block (RFC 1321). + std::uint8_t pad[64]{}; + pad[0] = 0x80; + unsigned pad_count = (buf_len < 56) ? static_cast(56 - buf_len) + : static_cast(120 - buf_len); + feed(pad, pad_count); // padding is not message data — don't count in total_bytes + std::uint64_t bit_len = total_bytes * 8; + std::uint8_t lenbytes[8]; + for (int j = 0; j < 8; ++j) { + lenbytes[j] = static_cast(bit_len >> (j * 8)); + } + feed(lenbytes, 8); + + MD5Hash result; + for (int j = 0; j < 4; ++j) { + result.bytes[j] = static_cast(a0 >> (j * 8)); + result.bytes[j + 4] = static_cast(b0 >> (j * 8)); + result.bytes[j + 8] = static_cast(c0 >> (j * 8)); + result.bytes[j + 12] = static_cast(d0 >> (j * 8)); + } + return result; } - return result; + +private: + // Buffer bytes and process full 64-byte blocks. Does not touch total_bytes, + // so finalize() can feed padding without inflating the recorded length. + auto feed(const std::uint8_t* data, std::size_t len) -> void { + if (buf_len > 0) { + std::size_t take = std::min(64 - buf_len, len); + std::memcpy(buf.data() + buf_len, data, take); + buf_len += static_cast(take); + data += take; + len -= take; + if (buf_len == 64) { + detail::md5_process(a0, b0, c0, d0, buf.data()); + buf_len = 0; + } + } + while (len >= 64) { + detail::md5_process(a0, b0, c0, d0, data); + data += 64; + len -= 64; + } + if (len > 0) { + std::memcpy(buf.data(), data, len); + buf_len = static_cast(len); + } + } +}; + +// One-shot MD5 over a contiguous buffer (kept for compatibility; now O(1) scratch +// via the incremental struct — no padded copy). +inline auto md5(std::string_view data) -> MD5Hash { + MD5 m; + m.update(reinterpret_cast(data.data()), data.size()); + return m.finalize(); } struct SumResult { diff --git a/include/cfbox/error.hpp b/include/cfbox/error.hpp index 18e7b8d..4ea568a 100644 --- a/include/cfbox/error.hpp +++ b/include/cfbox/error.hpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace cfbox::base { @@ -11,11 +10,6 @@ struct Error { std::string msg; }; -struct ErrorView { - int code; - std::string_view msg; -}; - template using Result = std::expected; } // namespace cfbox::base diff --git a/include/cfbox/fs_util.hpp b/include/cfbox/fs_util.hpp index 8c694a3..6a77e43 100644 --- a/include/cfbox/fs_util.hpp +++ b/include/cfbox/fs_util.hpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -259,6 +261,22 @@ inline auto lchown(std::string_view path, uid_t uid, gid_t gid) -> base::Result< return {}; } +// Resolve uid/gid to a name via NSS; fall back to the numeric id when NSS cannot +// resolve it (a statically linked cfbox on a minimal rootfs has no NSS libs, so +// names silently fail — show the number instead of a blank field). Shared by +// ls/stat/id/whoami so the fallback policy is consistent across applets. +inline auto owner_name(uid_t uid) -> std::string { + if (auto* pw = ::getpwuid(uid)) + return pw->pw_name; + return std::to_string(uid); +} + +inline auto group_name(gid_t gid) -> std::string { + if (auto* gr = ::getgrgid(gid)) + return gr->gr_name; + return std::to_string(gid); +} + // lstat — link-aware status (does NOT follow symlinks). Archive copy must read // the link itself rather than its target, so it cannot use std::filesystem::status. inline auto lstat(std::string_view path) -> base::Result { diff --git a/include/cfbox/io.hpp b/include/cfbox/io.hpp index 097731a..d7a4d33 100644 --- a/include/cfbox/io.hpp +++ b/include/cfbox/io.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -131,31 +132,48 @@ inline auto write_all(std::string_view path, std::string_view data) -> base::Res } template auto for_each_line(std::FILE* f, Fn&& fn) -> base::Result { - std::string line; - line.reserve(256); - int ch; - while ((ch = std::fgetc(f)) != EOF) { - if (ch == '\n') { - if constexpr (std::is_invocable_r_v) { - if (!fn(line)) - return {}; - } else { - fn(line); - } - line.clear(); + // Deliver a complete line; returns false to stop early (only when fn returns bool). + auto deliver = [&fn](const std::string& line) -> bool { + if constexpr (std::is_invocable_r_v) { + return fn(line); + } else { + fn(line); + return true; + } + }; + + // Block reads + memchr for newlines instead of fgetc-per-byte. Same line + // semantics (split on '\n', a trailing line without '\n' is still delivered, + // ferror checked, bool early-return honored); the buffer is heap-allocated so + // the stack stays tiny even with a large chunk. + constexpr std::size_t kChunk = 65536; + std::vector buf(kChunk); + std::string pending; // partial line carried across block boundaries + std::size_t pos = 0; + std::size_t avail = 0; + + for (;;) { + if (pos == avail) { + avail = std::fread(buf.data(), 1, kChunk, f); + pos = 0; + if (avail == 0) break; + } + if (auto* nl = static_cast(std::memchr(buf.data() + pos, '\n', avail - pos))) { + std::string line; + if (!pending.empty()) line = std::move(pending); + line.append(buf.data() + pos, static_cast(nl - (buf.data() + pos))); + pos = static_cast(nl - buf.data()) + 1; + if (!deliver(line)) return {}; } else { - line += static_cast(ch); + pending.append(buf.data() + pos, avail - pos); + pos = avail; } } if (std::ferror(f)) { return std::unexpected(base::Error{errno, "read error"}); } - if (!line.empty()) { - if constexpr (std::is_invocable_r_v) { - fn(line); - } else { - fn(line); - } + if (!pending.empty()) { + deliver(pending); // trailing line without newline (bool ignored, as before) } return {}; } diff --git a/include/cfbox/stream.hpp b/include/cfbox/stream.hpp index 76958fe..46eeb34 100644 --- a/include/cfbox/stream.hpp +++ b/include/cfbox/stream.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -54,27 +53,4 @@ inline auto split_whitespace(const std::string& line) -> std::vector std::string = 0; - virtual auto finalize() -> void {} -}; - -inline auto run_processor(std::string_view path, LineProcessor& proc) -> int { - auto result = for_each_line(path, [&](const std::string& line, std::size_t num) { - auto output = proc.process_line(line, num); - if (!output.empty()) { - std::fwrite(output.data(), 1, output.size(), stdout); - } - return true; - }); - if (!result) { - std::fprintf(stderr, "cfbox: %s\n", result.error().msg.c_str()); - return 1; - } - proc.finalize(); - return 0; -} - } // namespace cfbox::stream diff --git a/src/applets/awk/awk_executor.cpp b/src/applets/awk/awk_executor.cpp index 2696818..5417d11 100644 --- a/src/applets/awk/awk_executor.cpp +++ b/src/applets/awk/awk_executor.cpp @@ -5,6 +5,7 @@ #include #include +#include #include namespace cfbox::awk { @@ -450,9 +451,10 @@ class Executor { } auto process_file(const std::string& path, NodePtr prog) -> void { - FILE* f = std::fopen(path.c_str(), "r"); - if (!f) { CFBOX_ERR("awk", "cannot open '%s'", path.c_str()); return; }; + auto result = cfbox::io::open_file(path, "r"); + if (!result) { CFBOX_ERR("awk", "%s", result.error().msg.c_str()); return; }; st_.filename = path; + FILE* f = result->get(); char line[65536]; while (std::fgets(line, sizeof(line), f)) { std::string rec(line); @@ -460,7 +462,6 @@ class Executor { process_record(rec, prog); if (st_.should_exit) break; } - std::fclose(f); } auto process_stdin(NodePtr prog) -> void { diff --git a/src/applets/cal.cpp b/src/applets/cal.cpp index 0a60a87..3de30fc 100644 --- a/src/applets/cal.cpp +++ b/src/applets/cal.cpp @@ -90,7 +90,12 @@ auto cal_main(int argc, char* argv[]) -> int { const auto& pos = parsed.positional(); if (pos.size() == 1) { - auto val = std::stoi(std::string(pos[0])); + auto val_result = cfbox::args::parse_int(pos[0]); + if (!val_result) { + CFBOX_ERR("cal", "%s", val_result.error().msg.c_str()); + return 2; + } + auto val = *val_result; if (val >= 1 && val <= 12) { month = val; } else { @@ -98,8 +103,18 @@ auto cal_main(int argc, char* argv[]) -> int { month = 1; } } else if (pos.size() >= 2) { - month = std::stoi(std::string(pos[0])); - year = std::stoi(std::string(pos[1])); + auto month_result = cfbox::args::parse_int(pos[0]); + if (!month_result) { + CFBOX_ERR("cal", "%s", month_result.error().msg.c_str()); + return 2; + } + month = *month_result; + auto year_result = cfbox::args::parse_int(pos[1]); + if (!year_result) { + CFBOX_ERR("cal", "%s", year_result.error().msg.c_str()); + return 2; + } + year = *year_result; } bool three = parsed.has('3') || parsed.has_long("three"); diff --git a/src/applets/cmp.cpp b/src/applets/cmp.cpp index 99e7f1d..9b782fc 100644 --- a/src/applets/cmp.cpp +++ b/src/applets/cmp.cpp @@ -1,6 +1,8 @@ #include #include +#include #include +#include #include #include @@ -30,27 +32,40 @@ auto cmp_main(int argc, char* argv[]) -> int { return 2; } - auto a_result = cfbox::io::read_all(std::string{pos[0]}); - auto b_result = cfbox::io::read_all(std::string{pos[1]}); - if (!a_result) { CFBOX_ERR("cmp", "%s", a_result.error().msg.c_str()); return 2; }; - if (!b_result) { CFBOX_ERR("cmp", "%s", b_result.error().msg.c_str()); return 2; }; - - const auto& a = *a_result; - const auto& b = *b_result; - auto min_len = std::min(a.size(), b.size()); - - for (std::size_t i = 0; i < min_len; ++i) { - if (a[i] != b[i]) { - std::printf("%.*s %.*s differ: byte %zu, line %zu\n", - static_cast(pos[0].size()), pos[0].data(), - static_cast(pos[1].size()), pos[1].data(), - i + 1, static_cast(std::count(a.begin(), a.begin() + static_cast(i), '\n')) + 1); + auto f1 = cfbox::io::open_file(std::string{pos[0]}, "rb"); + if (!f1) { CFBOX_ERR("cmp", "%s", f1.error().msg.c_str()); return 2; } + auto f2 = cfbox::io::open_file(std::string{pos[1]}, "rb"); + if (!f2) { CFBOX_ERR("cmp", "%s", f2.error().msg.c_str()); return 2; } + + // Stream both in lockstep: O(1) memory, and stop at the first differing + // byte instead of reading both files entirely first. + constexpr std::size_t kChunk = 65536; + std::vector buf1(kChunk); + std::vector buf2(kChunk); + std::size_t base = 0; // byte offset of the current block + std::size_t lines = 0; // newlines seen before the current scan position + + for (;;) { + std::size_t n1 = std::fread(buf1.data(), 1, kChunk, f1->get()); + std::size_t n2 = std::fread(buf2.data(), 1, kChunk, f2->get()); + std::size_t n = std::min(n1, n2); + for (std::size_t i = 0; i < n; ++i) { + if (buf1[i] != buf2[i]) { + std::printf("%.*s %.*s differ: byte %zu, line %zu\n", + static_cast(pos[0].size()), pos[0].data(), + static_cast(pos[1].size()), pos[1].data(), + base + i + 1, lines + 1); + return 1; + } + if (buf1[i] == '\n') ++lines; + } + if (n1 != n2) { + // common prefix matched but one file is shorter + std::printf("cmp: EOF on %s\n", n1 < n2 ? pos[0].data() : pos[1].data()); return 1; } - } - if (a.size() != b.size()) { - std::printf("cmp: EOF on %s\n", a.size() < b.size() ? pos[0].data() : pos[1].data()); - return 1; + if (n1 < kChunk) break; // both at EOF, files identical + base += n; } return 0; } diff --git a/src/applets/diff.cpp b/src/applets/diff.cpp index dacbd52..d9d0845 100644 --- a/src/applets/diff.cpp +++ b/src/applets/diff.cpp @@ -165,6 +165,26 @@ static auto build_hunks(const std::vector& edits, std::vector hunks; int hunk_start = std::max(0, change_idx[0] - context); + // Slice edits[start..end] into a Hunk and count its a/b line ranges. + auto finalize_hunk = [&](int start, int end) { + Hunk h; + h.edits.assign(edits.begin() + start, edits.begin() + end + 1); + h.a_start = 1; h.a_count = 0; + h.b_start = 1; h.b_count = 0; + bool a_init = false, b_init = false; + for (auto& e : h.edits) { + if (e.op == ' ' || e.op == '-') { + if (!a_init) { h.a_start = static_cast(e.line) + 1; a_init = true; } + ++h.a_count; + } + if (e.op == ' ' || e.op == '+') { + if (!b_init) { h.b_start = static_cast(e.line) + 1; b_init = true; } + ++h.b_count; + } + } + hunks.push_back(std::move(h)); + }; + for (int ci = 1; ci < static_cast(change_idx.size()); ++ci) { int gap_start = change_idx[static_cast(ci - 1)] + 1; int gap_end = change_idx[static_cast(ci)] - 1; @@ -172,45 +192,14 @@ static auto build_hunks(const std::vector& edits, if (gap_end - gap_start + 1 > 2 * context) { int hunk_end = std::min(static_cast(edits.size()) - 1, change_idx[static_cast(ci - 1)] + context); - Hunk h; - h.edits.assign(edits.begin() + hunk_start, edits.begin() + hunk_end + 1); - // Count a/b lines for this hunk - h.a_start = 1; h.a_count = 0; - h.b_start = 1; h.b_count = 0; - bool a_init = false, b_init = false; - for (auto& e : h.edits) { - if (e.op == ' ' || e.op == '-') { - if (!a_init) { h.a_start = static_cast(e.line) + 1; a_init = true; } - ++h.a_count; - } - if (e.op == ' ' || e.op == '+') { - if (!b_init) { h.b_start = static_cast(e.line) + 1; b_init = true; } - ++h.b_count; - } - } - hunks.push_back(std::move(h)); + finalize_hunk(hunk_start, hunk_end); hunk_start = std::max(0, change_idx[static_cast(ci)] - context); } } // Last hunk int hunk_end = std::min(static_cast(edits.size()) - 1, change_idx.back() + context); - Hunk h; - h.edits.assign(edits.begin() + hunk_start, edits.begin() + hunk_end + 1); - h.a_start = 1; h.a_count = 0; - h.b_start = 1; h.b_count = 0; - bool a_init = false, b_init = false; - for (auto& e : h.edits) { - if (e.op == ' ' || e.op == '-') { - if (!a_init) { h.a_start = static_cast(e.line) + 1; a_init = true; } - ++h.a_count; - } - if (e.op == ' ' || e.op == '+') { - if (!b_init) { h.b_start = static_cast(e.line) + 1; b_init = true; } - ++h.b_count; - } - } - hunks.push_back(std::move(h)); + finalize_hunk(hunk_start, hunk_end); return hunks; } diff --git a/src/applets/dmesg.cpp b/src/applets/dmesg.cpp index d66fb62..8d9252e 100644 --- a/src/applets/dmesg.cpp +++ b/src/applets/dmesg.cpp @@ -23,8 +23,12 @@ constexpr cfbox::help::HelpEntry HELP = { auto read_kmsg() -> std::vector { std::vector lines; - cfbox::io::unique_file f(std::fopen("/var/log/dmesg", "r")); - if (!f) f.reset(std::fopen("/var/log/kern.log", "r")); + cfbox::io::unique_file f; + if (auto r = cfbox::io::open_file("/var/log/dmesg", "r")) { + f = std::move(*r); + } else if (auto r2 = cfbox::io::open_file("/var/log/kern.log", "r")) { + f = std::move(*r2); + } if (!f) { CFBOX_ERR("dmesg", "cannot open kernel log"); return lines; diff --git a/src/applets/expand.cpp b/src/applets/expand.cpp index a53c233..f7dbc5a 100644 --- a/src/applets/expand.cpp +++ b/src/applets/expand.cpp @@ -27,7 +27,12 @@ auto expand_main(int argc, char* argv[]) -> int { int tab_stop = 8; if (auto t = parsed.get_any('t', "tabs")) { - tab_stop = std::stoi(std::string{*t}); + auto parsed_tabs = cfbox::args::parse_int(*t); + if (!parsed_tabs) { + CFBOX_ERR("expand", "%s", parsed_tabs.error().msg.c_str()); + return 2; + } + tab_stop = *parsed_tabs; if (tab_stop <= 0) { CFBOX_ERR("expand", "invalid tab stop: %d", tab_stop); return 1; diff --git a/src/applets/fold.cpp b/src/applets/fold.cpp index bcb6c5e..875d2ae 100644 --- a/src/applets/fold.cpp +++ b/src/applets/fold.cpp @@ -29,7 +29,12 @@ auto fold_main(int argc, char* argv[]) -> int { int width = 80; if (auto w = parsed.get_any('w', "width")) { - width = std::stoi(std::string{*w}); + auto parsed_width = cfbox::args::parse_int(*w); + if (!parsed_width) { + CFBOX_ERR("fold", "%s", parsed_width.error().msg.c_str()); + return 2; + } + width = *parsed_width; if (width <= 0) { CFBOX_ERR("fold", "invalid width: %d", width); return 1; diff --git a/src/applets/hexdump.cpp b/src/applets/hexdump.cpp index d1dcf2c..7b23c1c 100644 --- a/src/applets/hexdump.cpp +++ b/src/applets/hexdump.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace { @@ -65,8 +66,22 @@ auto hexdump_main(int argc, char* argv[]) -> int { bool canonical = parsed.has('C') || parsed.has_long("canonical"); std::size_t max_bytes = 0; std::uint64_t skip_bytes = 0; - if (auto v = parsed.get('n')) max_bytes = static_cast(std::stoull(std::string(*v))); - if (auto v = parsed.get('s')) skip_bytes = std::stoull(std::string(*v)); + if (auto v = parsed.get('n')) { + auto parsed_n = cfbox::args::parse_int(*v); + if (!parsed_n) { + CFBOX_ERR("hexdump", "%s", parsed_n.error().msg.c_str()); + return 2; + } + max_bytes = static_cast(*parsed_n); + } + if (auto v = parsed.get('s')) { + auto parsed_s = cfbox::args::parse_int(*v); + if (!parsed_s) { + CFBOX_ERR("hexdump", "%s", parsed_s.error().msg.c_str()); + return 2; + } + skip_bytes = static_cast(*parsed_s); + } const auto& pos = parsed.positional(); std::string filename = pos.empty() ? "" : std::string(pos[0]); @@ -114,12 +129,11 @@ auto hexdump_main(int argc, char* argv[]) -> int { return do_dump(stdin); } - std::FILE* f = std::fopen(filename.c_str(), "rb"); - if (!f) { + auto file_result = cfbox::io::open_file(filename, "rb"); + if (!file_result) { CFBOX_ERR("hexdump", "cannot open %s", filename.c_str()); return 1; } - auto rc = do_dump(f); - std::fclose(f); - return rc; + auto f = std::move(*file_result); + return do_dump(f.get()); } diff --git a/src/applets/id.cpp b/src/applets/id.cpp index 7804809..6cba295 100644 --- a/src/applets/id.cpp +++ b/src/applets/id.cpp @@ -1,11 +1,10 @@ #include -#include -#include #include #include #include #include +#include #include namespace { @@ -44,24 +43,14 @@ auto id_main(int argc, char* argv[]) -> int { uid_t uid = opt_r ? getuid() : geteuid(); gid_t gid = opt_r ? getgid() : getegid(); - auto get_username = [](uid_t id) -> std::string { - auto* pw = getpwuid(id); - return pw ? std::string{pw->pw_name} : std::to_string(static_cast(id)); - }; - - auto get_groupname = [](gid_t id) -> std::string { - auto* gr = getgrgid(id); - return gr ? std::string{gr->gr_name} : std::to_string(static_cast(id)); - }; - if (opt_u) { - if (opt_n) std::puts(get_username(uid).c_str()); + if (opt_n) std::puts(cfbox::fs::owner_name(uid).c_str()); else std::printf("%u\n", static_cast(uid)); return 0; } if (opt_g) { - if (opt_n) std::puts(get_groupname(gid).c_str()); + if (opt_n) std::puts(cfbox::fs::group_name(gid).c_str()); else std::printf("%u\n", static_cast(gid)); return 0; } @@ -79,7 +68,7 @@ auto id_main(int argc, char* argv[]) -> int { if (!first) std::fputc(' ', stdout); first = false; if (opt_n) { - std::fputs(get_groupname(g).c_str(), stdout); + std::fputs(cfbox::fs::group_name(g).c_str(), stdout); } else { std::printf("%u", static_cast(g)); } @@ -95,8 +84,8 @@ auto id_main(int argc, char* argv[]) -> int { // Default: print full format std::printf("uid=%u(%s) gid=%u(%s)", - static_cast(uid), get_username(uid).c_str(), - static_cast(gid), get_groupname(gid).c_str()); + static_cast(uid), cfbox::fs::owner_name(uid).c_str(), + static_cast(gid), cfbox::fs::group_name(gid).c_str()); std::vector groups; int ngroups = getgroups(0, nullptr); @@ -109,7 +98,7 @@ auto id_main(int argc, char* argv[]) -> int { for (auto g : groups) { if (!first) std::fputc(',', stdout); first = false; - std::printf("%u(%s)", static_cast(g), get_groupname(g).c_str()); + std::printf("%u(%s)", static_cast(g), cfbox::fs::group_name(g).c_str()); } } std::fputc('\n', stdout); diff --git a/src/applets/init/init_inittab.cpp b/src/applets/init/init_inittab.cpp index 9bc3561..b5d6433 100644 --- a/src/applets/init/init_inittab.cpp +++ b/src/applets/init/init_inittab.cpp @@ -1,5 +1,7 @@ #include "init.hpp" +#include + namespace cfbox::init { auto parse_inittab_line(std::string_view line) -> InittabEntry { @@ -31,13 +33,12 @@ auto parse_inittab_line(std::string_view line) -> InittabEntry { auto parse_inittab(const std::string& path) -> std::vector { std::vector entries; - FILE* f = std::fopen(path.c_str(), "r"); - if (!f) return entries; - char buf[1024]; - while (std::fgets(buf, sizeof(buf), f)) { - std::string_view line(buf); - // Remove newline + // Stream the inittab line by line via RAII-backed io; an open failure matches + // the original silent fallback (empty result). for_each_line strips the + // trailing '\n'; we still trim a stray '\r' before parsing. + auto result = cfbox::io::for_each_line(path, [&](const std::string& raw) { + std::string_view line(raw); while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) line = line.substr(0, line.size() - 1); @@ -45,9 +46,9 @@ auto parse_inittab(const std::string& path) -> std::vector { if (!entry.action.empty() && !entry.process.empty()) { entries.push_back(std::move(entry)); } - } + }); + (void)result; - std::fclose(f); return entries; } diff --git a/src/applets/init/init_main.cpp b/src/applets/init/init_main.cpp index 005fafb..d4ffb61 100644 --- a/src/applets/init/init_main.cpp +++ b/src/applets/init/init_main.cpp @@ -8,6 +8,7 @@ #include #include +#include namespace cfbox::init { @@ -85,8 +86,8 @@ auto init_main(int argc, char* argv[]) -> int { cfbox::init::InitState state; state.is_pid1 = (getpid() == 1); - // Check if /etc/inittab exists - FILE* inittab = std::fopen("/etc/inittab", "r"); + // Check if /etc/inittab exists (open_file probes readability; RAII closes on drop) + auto inittab = cfbox::io::open_file("/etc/inittab", "r"); if (!inittab) { // Fallback: QEMU smoke test mode (preserves CI compatibility) if (state.is_pid1) { @@ -102,7 +103,6 @@ auto init_main(int argc, char* argv[]) -> int { } return 0; } - std::fclose(inittab); // Full init mode state.entries = cfbox::init::parse_inittab("/etc/inittab"); diff --git a/src/applets/iostat.cpp b/src/applets/iostat.cpp index 6853d13..303d651 100644 --- a/src/applets/iostat.cpp +++ b/src/applets/iostat.cpp @@ -79,7 +79,14 @@ auto iostat_main(int argc, char* argv[]) -> int { int count = 1; double delay = 1.0; - if (auto v = parsed.get('c')) count = std::stoi(std::string(*v)); + if (auto v = parsed.get('c')) { + auto parsed_count = cfbox::args::parse_int(std::string(*v)); + if (!parsed_count) { + CFBOX_ERR("iostat", "%s", parsed_count.error().msg.c_str()); + return 2; + } + count = *parsed_count; + } if (auto v = parsed.get('d')) delay = std::stod(std::string(*v)); auto first = cfbox::proc::read_diskstats(); diff --git a/src/applets/ls.cpp b/src/applets/ls.cpp index cd21dc3..013bd0c 100644 --- a/src/applets/ls.cpp +++ b/src/applets/ls.cpp @@ -78,21 +78,6 @@ auto format_time(std::filesystem::file_time_type ftime) -> std::string { return buf; } -// Resolve uid/gid to a name; fall back to the numeric id when NSS cannot -// resolve it (a statically linked cfbox on a minimal rootfs has no NSS libs, -// so names silently fail — show the number instead of a blank field). -auto owner_of(uid_t uid) -> std::string { - if (auto* pw = getpwuid(uid)) - return pw->pw_name; - return std::to_string(uid); -} - -auto group_of(gid_t gid) -> std::string { - if (auto* gr = getgrgid(gid)) - return gr->gr_name; - return std::to_string(gid); -} - enum class ColorMode { Never, Auto, Always }; struct LsOptions { @@ -181,8 +166,8 @@ auto print_entry(const std::string& path, const LsOptions& opts, bool use_color) std::string group = "?"; struct stat lst {}; if (::lstat(path.c_str(), &lst) == 0) { - owner = owner_of(lst.st_uid); - group = group_of(lst.st_gid); + owner = cfbox::fs::owner_name(lst.st_uid); + group = cfbox::fs::group_name(lst.st_gid); } if (type == std::filesystem::file_type::symlink) { diff --git a/src/applets/md5sum.cpp b/src/applets/md5sum.cpp index f53bb6f..4509951 100644 --- a/src/applets/md5sum.cpp +++ b/src/applets/md5sum.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include #include @@ -27,16 +29,37 @@ auto md5sum_main(int argc, char* argv[]) -> int { const auto& pos = parsed.positional(); auto paths = pos.empty() ? std::vector{"-"} : pos; + constexpr std::size_t kChunk = 65536; int rc = 0; for (auto p : paths) { - auto data_result = (p == "-") ? cfbox::io::read_all_stdin() : cfbox::io::read_all(p); - if (!data_result) { - CFBOX_ERR("md5sum", "%s", data_result.error().msg.c_str()); + cfbox::checksum::MD5 md5; + std::vector buf(kChunk); + bool ok = false; + if (p == "-") { + ok = true; + while (std::size_t n = std::fread(buf.data(),1, kChunk, stdin)) { + md5.update(buf.data(), n); + } + if (std::ferror(stdin)) ok = false; + } else { + auto fh = cfbox::io::open_file(p, "rb"); + if (!fh) { + CFBOX_ERR("md5sum", "%s", fh.error().msg.c_str()); + rc = 1; + continue; + } + ok = true; + while (std::size_t n = std::fread(buf.data(),1, kChunk, fh->get())) { + md5.update(buf.data(), n); + } + if (std::ferror(fh->get())) ok = false; + } + if (!ok) { + CFBOX_ERR("md5sum", "%.*s: read error", static_cast(p.size()), p.data()); rc = 1; continue; } - auto hash = cfbox::checksum::md5(*data_result); - auto hex = cfbox::checksum::md5_to_hex(hash); + auto hex = cfbox::checksum::md5_to_hex(md5.finalize()); std::printf("%s ", hex.c_str()); if (p == "-") { std::puts("-"); diff --git a/src/applets/mdev.cpp b/src/applets/mdev.cpp index d250e9a..8e610ed 100644 --- a/src/applets/mdev.cpp +++ b/src/applets/mdev.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -33,13 +34,12 @@ struct SysDevice { // Read ":" from /dev; absent or unparseable => nullopt. auto read_dev(const std::string& dir) -> std::optional> { - FILE* f = std::fopen((dir + "/dev").c_str(), "r"); + auto f = cfbox::io::open_file(dir + "/dev", "r"); if (!f) - return std::nullopt; + return std::nullopt; // absent dev file is normal for many sysfs entries int maj = -1; int min = -1; - int n = std::fscanf(f, "%d:%d", &maj, &min); - std::fclose(f); + int n = std::fscanf(f->get(), "%d:%d", &maj, &min); if (n != 2 || maj < 0) return std::nullopt; return std::make_pair(maj, min); diff --git a/src/applets/more.cpp b/src/applets/more.cpp index 96a716d..8e5c85b 100644 --- a/src/applets/more.cpp +++ b/src/applets/more.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -51,19 +52,20 @@ auto more_main(int argc, char* argv[]) -> int { const auto& pos = parsed.positional(); std::string filename = pos.empty() ? "" : std::string(pos[0]); - std::FILE* f = stdin; + std::vector lines; if (!filename.empty() && filename != "-") { - f = std::fopen(filename.c_str(), "r"); - if (!f) { - CFBOX_ERR("more", "cannot open %s", filename.c_str()); + auto opened = cfbox::io::open_file(filename, "r"); + if (!opened) { + CFBOX_ERR("more", "%s", opened.error().msg.c_str()); return 1; } + // RAII unique_file keeps the FILE* alive across read_lines(), then + // auto-closes on scope exit (replaces the old manual fclose). + lines = read_lines(opened.value().get()); + } else { + lines = read_lines(stdin); } - auto lines = read_lines(f); - if (f != stdin) - std::fclose(f); - if (lines.empty()) return 0; diff --git a/src/applets/mount.cpp b/src/applets/mount.cpp index ea0bc13..494fe17 100644 --- a/src/applets/mount.cpp +++ b/src/applets/mount.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace { @@ -87,12 +88,12 @@ auto parse_mount_options(std::string_view opts) -> MountOpts { // fields beyond options (dump/pass) are ignored. auto parse_fstab(const std::string& path) -> std::vector { std::vector entries; - FILE* f = std::fopen(path.c_str(), "r"); - if (!f) + auto fh = cfbox::io::open_file(path, "r"); + if (!fh) return entries; char buf[512]; - while (std::fgets(buf, sizeof(buf), f)) { + while (std::fgets(buf, sizeof(buf), fh->get())) { std::string_view line(buf); while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) line.remove_suffix(1); @@ -135,7 +136,6 @@ auto parse_fstab(const std::string& path) -> std::vector { entries.push_back(std::move(e)); } } - std::fclose(f); return entries; } diff --git a/src/applets/nice.cpp b/src/applets/nice.cpp index bbd4c52..82f94d4 100644 --- a/src/applets/nice.cpp +++ b/src/applets/nice.cpp @@ -30,7 +30,12 @@ auto nice_main(int argc, char* argv[]) -> int { int adjustment = 10; if (auto n = parsed.get_any('n', "adjustment")) { - adjustment = std::stoi(std::string{*n}); + auto parsed_adj = cfbox::args::parse_int(*n); + if (!parsed_adj) { + CFBOX_ERR("nice", "%s", parsed_adj.error().msg.c_str()); + return 2; + } + adjustment = *parsed_adj; } const auto& pos = parsed.positional(); diff --git a/src/applets/pgrep.cpp b/src/applets/pgrep.cpp index 31eef06..e094bf3 100644 --- a/src/applets/pgrep.cpp +++ b/src/applets/pgrep.cpp @@ -58,11 +58,27 @@ auto pgrep_main(int argc, char* argv[]) -> int { bool list_names = parsed.has('l') || parsed.has_long("list"); bool exact = parsed.has('x') || parsed.has_long("exact"); + const char* cmd = is_pkill ? "pkill" : "pgrep"; + pid_t filter_ppid = -1; - if (auto v = parsed.get('P')) filter_ppid = static_cast(std::stoi(std::string(*v))); + if (auto v = parsed.get('P')) { + auto ppid = cfbox::args::parse_int(*v); + if (!ppid) { + CFBOX_ERR_V(cmd, "%s", ppid.error().msg.c_str()); + return 2; + } + filter_ppid = static_cast(*ppid); + } uid_t filter_uid = static_cast(-1); - if (auto v = parsed.get('u')) filter_uid = static_cast(std::stoi(std::string(*v))); + if (auto v = parsed.get('u')) { + auto uid = cfbox::args::parse_int(*v); + if (!uid) { + CFBOX_ERR_V(cmd, "%s", uid.error().msg.c_str()); + return 2; + } + filter_uid = static_cast(*uid); + } int sig = SIGTERM; if (auto v = parsed.get('s')) { @@ -74,19 +90,26 @@ auto pgrep_main(int argc, char* argv[]) -> int { else if (sname == "TERM") sig = SIGTERM; else if (sname == "USR1") sig = SIGUSR1; else if (sname == "USR2") sig = SIGUSR2; - else sig = std::stoi(std::string(*v)); + else { + auto num = cfbox::args::parse_int(*v); + if (!num) { + CFBOX_ERR_V(cmd, "%s", num.error().msg.c_str()); + return 2; + } + sig = *num; + } } const auto& pos = parsed.positional(); if (pos.empty()) { - CFBOX_ERR_V(is_pkill ? "pkill" : "pgrep", "no pattern specified"); + CFBOX_ERR_V(cmd, "no pattern specified"); return 1; } const auto& pattern = pos[0]; auto result = cfbox::proc::read_all_processes(); if (!result) { - CFBOX_ERR_V(is_pkill ? "pkill" : "pgrep", "%s", result.error().msg.c_str()); + CFBOX_ERR_V(cmd, "%s", result.error().msg.c_str()); return 1; } diff --git a/src/applets/pmap.cpp b/src/applets/pmap.cpp index 2585a62..0afe575 100644 --- a/src/applets/pmap.cpp +++ b/src/applets/pmap.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -34,12 +35,12 @@ struct MapEntry { auto parse_maps(pid_t pid) -> std::vector { auto path = "/proc/" + std::to_string(pid) + "/maps"; - auto* f = std::fopen(path.c_str(), "r"); - if (!f) return {}; + auto opened = cfbox::io::open_file(path, "r"); + if (!opened) return {}; std::vector entries; char line[1024]; - while (std::fgets(line, sizeof(line), f)) { + while (std::fgets(line, sizeof(line), opened->get())) { auto len = std::strlen(line); while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { line[--len] = '\0'; @@ -89,7 +90,6 @@ auto parse_maps(pid_t pid) -> std::vector { entries.push_back(std::move(e)); } - std::fclose(f); // Calculate end_address from next entry for (size_t i = 0; i + 1 < entries.size(); ++i) { diff --git a/src/applets/renice.cpp b/src/applets/renice.cpp index 6757a5f..a57d484 100644 --- a/src/applets/renice.cpp +++ b/src/applets/renice.cpp @@ -36,7 +36,14 @@ auto renice_main(int argc, char* argv[]) -> int { if (parsed.has_long("version")) { cfbox::help::print_version(HELP); return 0; } int increment = 1; - if (auto v = parsed.get('n')) increment = std::stoi(std::string(*v)); + if (auto v = parsed.get('n')) { + auto parsed_inc = cfbox::args::parse_int(*v); + if (!parsed_inc) { + CFBOX_ERR("renice", "%s", parsed_inc.error().msg.c_str()); + return 2; + } + increment = *parsed_inc; + } const auto& args = parsed.positional(); if (args.empty()) { @@ -46,7 +53,12 @@ auto renice_main(int argc, char* argv[]) -> int { int rc = 0; for (const auto& id_str : args) { - auto id = static_cast(std::stoi(std::string(id_str))); + auto parsed_id = cfbox::args::parse_int(id_str); + if (!parsed_id) { + CFBOX_ERR("renice", "%s", parsed_id.error().msg.c_str()); + return 2; + } + auto id = static_cast(*parsed_id); int which = PRIO_PROCESS; if (parsed.has('g') || parsed.has_long("pgrp")) which = PRIO_PGRP; diff --git a/src/applets/rev.cpp b/src/applets/rev.cpp index 03fc998..46b9636 100644 --- a/src/applets/rev.cpp +++ b/src/applets/rev.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace { @@ -50,14 +51,13 @@ auto rev_main(int argc, char* argv[]) -> int { if (fn == "-") { process_stream(stdin); } else { - auto* f = std::fopen(fn.c_str(), "r"); - if (!f) { + auto opened = cfbox::io::open_file(fn, "r"); + if (!opened) { CFBOX_ERR("rev", "cannot open %s", fn.c_str()); rc = 1; continue; } - process_stream(f); - std::fclose(f); + process_stream(opened->get()); } } return rc; diff --git a/src/applets/sed.cpp b/src/applets/sed.cpp index ad65e21..0ebef0d 100644 --- a/src/applets/sed.cpp +++ b/src/applets/sed.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -40,6 +41,7 @@ struct SedCommand { enum Action { Substitute, Delete, Print } action = Substitute; std::string pattern; std::string replacement; + std::unique_ptr compiled_re; // precompiled; null = invalid pattern bool global = false; // g flag bool print_flag = false; // p flag bool delete_flag = false; // d flag (for substitute context) @@ -127,6 +129,15 @@ auto parse_substitute(std::string_view s) -> SedCommand { } cmd.pattern = pattern; + // Precompile the regex once per command instead of on every matching line + // (apply_substitute used to build a fresh scoped_regex each call). scoped_regex + // is non-movable, so hold it via unique_ptr; null means invalid pattern (no-op, + // matching the old behavior of returning false on compile failure). + cmd.compiled_re = std::make_unique(); + if (cmd.compiled_re->compile(cmd.pattern.c_str(), REG_EXTENDED) != 0) { + cmd.compiled_re.reset(); + } + // Extract replacement std::string replacement; for (std::size_t i = 0; i < s.size(); ++i) { @@ -183,6 +194,7 @@ auto parse_command(std::string_view script) -> SedCommand { cmd.action = SedCommand::Substitute; cmd.pattern = sub.pattern; cmd.replacement = sub.replacement; + cmd.compiled_re = std::move(sub.compiled_re); // precompiled regex (parse_substitute) cmd.global = sub.global; cmd.print_flag = sub.print_flag; cmd.delete_flag = sub.delete_flag; @@ -227,8 +239,8 @@ auto address_matches(const Address& addr, std::size_t line, std::size_t total_li } auto apply_substitute(std::string& line, const SedCommand& cmd) -> bool { - cfbox::util::scoped_regex re; - if (re.compile(cmd.pattern.c_str(), REG_EXTENDED) != 0) return false; + if (!cmd.compiled_re) return false; // invalid pattern (precompiled at parse time) + auto& re = *cmd.compiled_re; regmatch_t m; if (re.exec(line.c_str(), 1, &m, 0) != 0) return false; diff --git a/src/applets/sh/sh_builtins.cpp b/src/applets/sh/sh_builtins.cpp index 179119e..270178f 100644 --- a/src/applets/sh/sh_builtins.cpp +++ b/src/applets/sh/sh_builtins.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace cfbox::sh { @@ -213,20 +214,14 @@ static int builtin_source(std::vector& args, ShellState& state) { return 2; } - auto* fp = std::fopen(args[1].c_str(), "r"); - if (!fp) { - CFBOX_ERR("sh", "%s: %s", args[1].c_str(), std::strerror(errno)); + auto script_result = cfbox::io::read_all(args[1]); + if (!script_result) { + CFBOX_ERR("sh", "%s: %s", args[1].c_str(), + std::strerror(script_result.error().code)); return 1; } - std::string script; - char buf[4096]; - while (auto n = std::fread(buf, 1, sizeof(buf), fp)) { - script.append(buf, n); - } - std::fclose(fp); - - Lexer lexer(script); + Lexer lexer(*script_result); Parser parser(lexer); auto ast = parser.parse_program(); if (ast) return execute(*ast, state); diff --git a/src/applets/sh/sh_main.cpp b/src/applets/sh/sh_main.cpp index 92238ad..41db82b 100644 --- a/src/applets/sh/sh_main.cpp +++ b/src/applets/sh/sh_main.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include +#include namespace { @@ -43,18 +43,12 @@ auto run_string(const std::string& script, cfbox::sh::ShellState& state) -> int } auto run_file(const char* path, cfbox::sh::ShellState& state) -> int { - auto* fp = std::fopen(path, "r"); - if (!fp) { - CFBOX_ERR("sh", "%s: %s", path, std::strerror(errno)); + auto result = cfbox::io::read_all(path); + if (!result) { + CFBOX_ERR("sh", "%s: %s", path, result.error().msg.c_str()); return 127; } - std::string script; - char buf[4096]; - while (auto n = std::fread(buf, 1, sizeof(buf), fp)) { - script.append(buf, n); - } - std::fclose(fp); - return run_string(script, state); + return run_string(*result, state); } // Force the controlling tty into canonical mode so the kernel handles line diff --git a/src/applets/shuf.cpp b/src/applets/shuf.cpp index 918fede..c320545 100644 --- a/src/applets/shuf.cpp +++ b/src/applets/shuf.cpp @@ -34,7 +34,12 @@ auto shuf_main(int argc, char* argv[]) -> int { int max_count = -1; if (auto n = parsed.get_any('n', "head-count")) { - max_count = std::stoi(std::string{*n}); + auto parsed_count = cfbox::args::parse_int(*n); + if (!parsed_count) { + CFBOX_ERR("shuf", "%s", parsed_count.error().msg.c_str()); + return 2; + } + max_count = *parsed_count; } bool echo_mode = parsed.has('e'); diff --git a/src/applets/split.cpp b/src/applets/split.cpp index 6156d93..a20aaaa 100644 --- a/src/applets/split.cpp +++ b/src/applets/split.cpp @@ -1,6 +1,8 @@ +#include #include #include #include +#include #include #include @@ -31,6 +33,19 @@ static auto next_digit_suffix(int n) -> std::string { std::snprintf(buf, sizeof(buf), "%02d", n); return buf; } + +// Parse s as base-10 long with no throw. -l/-b historically went through std::stol +// (long range), so we keep long semantics here rather than cfbox::args::parse_int, +// which is int-bounded and would reject values > INT_MAX (e.g. split -b on multi-GB +// inputs). Matches parse_int's error shape so CFBOX_ERR output stays identical. +static auto parse_long(std::string_view s) -> cfbox::base::Result { + long v = 0; + auto res = std::from_chars(s.data(), s.data() + s.size(), v); + if (res.ec != std::errc{} || res.ptr != s.data() + s.size()) { + return std::unexpected(cfbox::base::Error{EINVAL, "not a valid integer: '" + std::string{s} + '\''}); + } + return v; +} } // namespace auto split_main(int argc, char* argv[]) -> int { @@ -46,10 +61,20 @@ auto split_main(int argc, char* argv[]) -> int { long lines = 1000; long bytes = 0; if (auto l = parsed.get_any('l', "lines")) { - lines = std::stol(std::string{*l}); + auto parsed_lines = parse_long(*l); + if (!parsed_lines) { + CFBOX_ERR("split", "%s", parsed_lines.error().msg.c_str()); + return 2; + } + lines = *parsed_lines; } if (auto b = parsed.get_any('b', "bytes")) { - bytes = std::stol(std::string{*b}); + auto parsed_bytes = parse_long(*b); + if (!parsed_bytes) { + CFBOX_ERR("split", "%s", parsed_bytes.error().msg.c_str()); + return 2; + } + bytes = *parsed_bytes; } bool numeric = parsed.has('d'); diff --git a/src/applets/stat.cpp b/src/applets/stat.cpp index 0f42dd7..b744d7a 100644 --- a/src/applets/stat.cpp +++ b/src/applets/stat.cpp @@ -3,11 +3,10 @@ #include #include #include -#include -#include #include #include +#include #include #include @@ -119,13 +118,11 @@ auto stat_main(int argc, char* argv[]) -> int { case 'f': std::printf("%lu", static_cast(st.st_blocks * 512)); break; case 'F': std::fputs(file_type_string(st.st_mode), stdout); break; case 'U': { - auto* pw = getpwuid(st.st_uid); - std::fputs(pw ? pw->pw_name : std::to_string(st.st_uid).c_str(), stdout); + std::fputs(cfbox::fs::owner_name(st.st_uid).c_str(), stdout); break; } case 'G': { - auto* gr = getgrgid(st.st_gid); - std::fputs(gr ? gr->gr_name : std::to_string(st.st_gid).c_str(), stdout); + std::fputs(cfbox::fs::group_name(st.st_gid).c_str(), stdout); break; } case 'a': std::printf("%o", st.st_mode & 07777); break; @@ -152,13 +149,11 @@ auto stat_main(int argc, char* argv[]) -> int { static_cast(st.st_blocks), static_cast(st.st_blksize), file_type_string(st.st_mode)); - auto* pw = getpwuid(st.st_uid); - auto* gr = getgrgid(st.st_gid); std::printf("Access: (%04o/%s) Uid: (%5u/%-8s) Gid: (%5u/%-8s)\n", st.st_mode & 07777u, format_perms(st.st_mode).c_str(), - st.st_uid, pw ? pw->pw_name : "", - st.st_gid, gr ? gr->gr_name : ""); + st.st_uid, cfbox::fs::owner_name(st.st_uid).c_str(), + st.st_gid, cfbox::fs::group_name(st.st_gid).c_str()); #if defined(__linux__) std::printf("Modify: %s\n", format_time(st.st_mtim).c_str()); std::printf("Change: %s\n", format_time(st.st_ctim).c_str()); diff --git a/src/applets/swapoff.cpp b/src/applets/swapoff.cpp index b5e37d7..a8eaa6b 100644 --- a/src/applets/swapoff.cpp +++ b/src/applets/swapoff.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -44,12 +45,14 @@ auto swapoff_main(int argc, char* argv[]) -> int { } if (parsed.has('a')) { - FILE* f = std::fopen("/proc/swaps", "r"); - if (!f) - return 0; // no swap configured — nothing to do + // open_file via RAII unique_file (auto-fclose); open failure stays silent + // (no swap configured — nothing to do), matching the prior fopen behavior. + auto opened = cfbox::io::open_file("/proc/swaps", "r"); + if (!opened) + return 0; + FILE* f = opened.value().get(); char line[512]; if (!std::fgets(line, sizeof(line), f)) { // skip header; empty swaps file is fine - std::fclose(f); return 0; } int rc = 0; @@ -62,7 +65,6 @@ auto swapoff_main(int argc, char* argv[]) -> int { rc = 1; } } - std::fclose(f); return rc; } diff --git a/src/applets/sysctl.cpp b/src/applets/sysctl.cpp index 8c4e4f0..8d734b2 100644 --- a/src/applets/sysctl.cpp +++ b/src/applets/sysctl.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace { @@ -46,26 +47,28 @@ auto path_to_key(std::string_view path) -> std::string { } auto read_sysctl_value(const std::string& path) -> std::string { - auto* f = std::fopen(path.c_str(), "r"); - if (!f) return {}; - char buf[4096]; - if (!std::fgets(buf, sizeof(buf), f)) { - std::fclose(f); - return {}; + // /proc/sys/* values are single-line; emulate the original fgets-first-line + // behavior via read_all (handles /proc 0-size fallback) then take up to the + // first newline, trimming trailing CR/LF. Open errors / empty reads map to + // the original empty-string "skip" result. + auto content = cfbox::io::read_all(path); + if (!content || content->empty()) return {}; + auto end = content->find('\n'); + auto first = (end == std::string::npos) ? *content : content->substr(0, end); + auto len = first.size(); + while (len > 0 && (first[len - 1] == '\n' || first[len - 1] == '\r')) { + first[--len] = '\0'; } - std::fclose(f); - auto len = std::strlen(buf); - while (len > 0 && (buf[len - 1] == '\n' || buf[len - 1] == '\r')) { - buf[--len] = '\0'; - } - return buf; + return first; } auto write_sysctl_value(const std::string& path, std::string_view value) -> bool { - auto* f = std::fopen(path.c_str(), "w"); + auto f = cfbox::io::open_file(path, "w"); if (!f) return false; - std::fprintf(f, "%.*s\n", static_cast(value.size()), value.data()); - return std::fclose(f) == 0; + std::fprintf(f->get(), "%.*s\n", static_cast(value.size()), value.data()); + // Explicit close to capture the flush/write status (fclose==0) the original + // returned; RAII would otherwise close silently on destruction. + return std::fclose(f->release()) == 0; } auto show_key(std::string_view key, bool no_name) -> bool { @@ -90,25 +93,26 @@ auto show_all(bool no_name) -> void { } auto load_file(const std::string& filepath, bool no_name) -> int { - auto* f = std::fopen(filepath.c_str(), "r"); + auto f = cfbox::io::open_file(filepath, "r"); if (!f) { CFBOX_ERR("sysctl", "cannot open %s", filepath.c_str()); return 1; } int errors = 0; - char line[4096]; - while (std::fgets(line, sizeof(line), f)) { - auto len = std::strlen(line); - while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { - line[--len] = '\0'; + auto result = cfbox::io::for_each_line(f->get(), [&](const std::string& line) { + // for_each_line strips the trailing '\n'; also drop a trailing '\r' + // (CRLF) to match the original fgets-based trimming. + std::string trimmed = line; + while (!trimmed.empty() && (trimmed.back() == '\n' || trimmed.back() == '\r')) { + trimmed.pop_back(); } + auto len = trimmed.size(); // Skip comments and empty lines - if (len == 0 || line[0] == '#' || line[0] == ';') continue; - auto* eq = std::strchr(line, '='); - if (!eq) continue; - *eq = '\0'; - std::string key(line); + if (len == 0 || trimmed[0] == '#' || trimmed[0] == ';') return; + auto* eq = std::strchr(trimmed.c_str(), '='); + if (!eq) return; + std::string key(trimmed.c_str(), eq); std::string val(eq + 1); // Trim whitespace while (!key.empty() && (key.back() == ' ' || key.back() == '\t')) key.pop_back(); @@ -121,8 +125,8 @@ auto load_file(const std::string& filepath, bool no_name) -> int { } else if (!no_name) { std::printf("%s = %s\n", key.c_str(), val.c_str()); } - } - std::fclose(f); + }); + (void)result; // line-level I/O errors leave the per-line error count as-is return errors > 0 ? 1 : 0; } diff --git a/src/applets/tar.cpp b/src/applets/tar.cpp index 36b0682..315292a 100644 --- a/src/applets/tar.cpp +++ b/src/applets/tar.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -116,42 +117,58 @@ auto tar_main(int argc, char* argv[]) -> int { if (create) { if (!dir.empty()) std::filesystem::current_path(dir); - std::string archive_data; - auto targets = pos.empty() ? std::vector{"."} : pos; + // Stream the archive straight to its destination instead of buffering the + // whole thing in one string (which used to scale with total archive size). + std::FILE* out = stdout; + cfbox::io::unique_file outfile; + if (archive != "-") { + auto oh = cfbox::io::open_file(archive, "wb"); + if (!oh) { + CFBOX_ERR("tar", "%s", oh.error().msg.c_str()); + return 1; + } + outfile = std::move(*oh); + out = outfile.get(); + } + auto targets = pos.empty() ? std::vector{"."} : pos; std::vector> files; for (auto t : targets) { collect_files(std::filesystem::path{t}, files); } + constexpr std::size_t kChunk = 65536; + std::vector buf(kChunk); + char pad[512]{}; + for (auto& [relpath, fullpath] : files) { if (std::filesystem::is_directory(fullpath)) { auto hdr = create_header(relpath, 0, '5'); - archive_data.append(reinterpret_cast(&hdr), 512); + std::fwrite(&hdr, 1, sizeof(hdr), out); continue; } - auto data = cfbox::io::read_all(fullpath.string()); - if (!data) { - CFBOX_ERR("tar", "%s: %s", relpath.c_str(), data.error().msg.c_str()); + std::error_code ec; + auto sz = std::filesystem::file_size(fullpath, ec); + if (ec) { + CFBOX_ERR("tar", "%s: %s", relpath.c_str(), ec.message().c_str()); continue; } - auto hdr = create_header(relpath, data->size(), '0'); - archive_data.append(reinterpret_cast(&hdr), 512); - archive_data.append(*data); - auto rem = data->size() % 512; - if (rem > 0) archive_data.append(512 - rem, '\0'); - } - archive_data.append(1024, '\0'); - - if (archive == "-") { - std::fwrite(archive_data.data(), 1, archive_data.size(), stdout); - } else { - auto wresult = cfbox::io::write_all(archive, archive_data); - if (!wresult) { - CFBOX_ERR("tar", "%s", wresult.error().msg.c_str()); - return 1; + auto fh = cfbox::io::open_file(fullpath.string(), "rb"); + if (!fh) { + CFBOX_ERR("tar", "%s: %s", relpath.c_str(), fh.error().msg.c_str()); + continue; + } + auto hdr = create_header(relpath, static_cast(sz), '0'); + std::fwrite(&hdr, 1, sizeof(hdr), out); + std::size_t n; + while ((n = std::fread(buf.data(), 1, kChunk, fh->get())) != 0) { + std::fwrite(buf.data(), 1, n, out); } + auto rem = static_cast(sz % 512); + if (rem > 0) std::fwrite(pad, 1, 512 - rem, out); } + std::fwrite(pad, 1, 512, out); // two zero blocks mark end of archive + std::fwrite(pad, 1, 512, out); return 0; } diff --git a/src/applets/tee.cpp b/src/applets/tee.cpp index a36994c..b2cd2f5 100644 --- a/src/applets/tee.cpp +++ b/src/applets/tee.cpp @@ -32,11 +32,11 @@ auto tee_main(int argc, char* argv[]) -> int { std::vector files; for (auto p : pos) { - auto* f = std::fopen(std::string{p}.c_str(), append ? "ab" : "wb"); - if (!f) { - CFBOX_ERR("tee", "%s: %s", std::string{p}.c_str(), std::strerror(errno)); + auto fres = cfbox::io::open_file(p, append ? "ab" : "wb"); + if (!fres) { + CFBOX_ERR("tee", "%s: %s", std::string{p}.c_str(), std::strerror(fres.error().code)); } else { - files.emplace_back(f); + files.emplace_back(std::move(*fres)); } } diff --git a/src/applets/top/top_main.cpp b/src/applets/top/top_main.cpp index 99c1861..f9f5b9d 100644 --- a/src/applets/top/top_main.cpp +++ b/src/applets/top/top_main.cpp @@ -248,8 +248,22 @@ auto top_main(int argc, char* argv[]) -> int { int delay = 3; bool batch = parsed.has('b') || parsed.has_long("batch"); int iterations = 0; - if (auto v = parsed.get('d')) delay = std::stoi(std::string(*v)); - if (auto v = parsed.get('n')) iterations = std::stoi(std::string(*v)); + if (auto v = parsed.get('d')) { + auto parsed_delay = cfbox::args::parse_int(*v); + if (!parsed_delay) { + CFBOX_ERR("top", "%s", parsed_delay.error().msg.c_str()); + return 2; + } + delay = *parsed_delay; + } + if (auto v = parsed.get('n')) { + auto parsed_iters = cfbox::args::parse_int(*v); + if (!parsed_iters) { + CFBOX_ERR("top", "%s", parsed_iters.error().msg.c_str()); + return 2; + } + iterations = *parsed_iters; + } if (delay < 1) delay = 1; if (batch) { diff --git a/src/applets/watch.cpp b/src/applets/watch.cpp index b4d7401..2c11b3e 100644 --- a/src/applets/watch.cpp +++ b/src/applets/watch.cpp @@ -83,7 +83,14 @@ auto watch_main(int argc, char* argv[]) -> int { if (parsed.has_long("version")) { cfbox::help::print_version(HELP); return 0; } int interval = 2; - if (auto v = parsed.get('n')) interval = std::stoi(std::string(*v)); + if (auto v = parsed.get('n')) { + auto parsed_interval = cfbox::args::parse_int(*v); + if (!parsed_interval) { + CFBOX_ERR("watch", "%s", parsed_interval.error().msg.c_str()); + return 2; + } + interval = *parsed_interval; + } if (interval < 1) interval = 1; const auto& pos = parsed.positional(); diff --git a/src/applets/whoami.cpp b/src/applets/whoami.cpp index 84f2daa..8b9a3f0 100644 --- a/src/applets/whoami.cpp +++ b/src/applets/whoami.cpp @@ -1,9 +1,9 @@ #include -#include #include #include #include +#include #include namespace { @@ -23,11 +23,6 @@ auto whoami_main(int argc, char* argv[]) -> int { if (parsed.has_long("help")) { cfbox::help::print_help(HELP); return 0; } if (parsed.has_long("version")) { cfbox::help::print_version(HELP); return 0; } - auto* pw = getpwuid(geteuid()); - if (pw) { - std::puts(pw->pw_name); - } else { - std::printf("%u\n", static_cast(geteuid())); - } + std::puts(cfbox::fs::owner_name(geteuid()).c_str()); return 0; } diff --git a/src/applets/xargs.cpp b/src/applets/xargs.cpp index e75318f..5da1915 100644 --- a/src/applets/xargs.cpp +++ b/src/applets/xargs.cpp @@ -40,7 +40,12 @@ auto xargs_main(int argc, char* argv[]) -> int { int max_args = 0; if (auto n = parsed.get_any('n', "max-args")) { - max_args = std::stoi(std::string{*n}); + auto parsed_n = cfbox::args::parse_int(*n); + if (!parsed_n) { + CFBOX_ERR("xargs", "%s", parsed_n.error().msg.c_str()); + return 2; + } + max_args = *parsed_n; } std::string replace_str; diff --git a/tests/benchmark/bench_io_lines.cpp b/tests/benchmark/bench_io_lines.cpp new file mode 100644 index 0000000..ba7f891 --- /dev/null +++ b/tests/benchmark/bench_io_lines.cpp @@ -0,0 +1,44 @@ +// Micro-benchmark: io::for_each_line on a large file (the shared line-reader +// used by grep/cut/expand/fold/nl/paste/tac/shuf/tsort). Isolates the reader. +#include + +#include +#include +#include +#include + +#include + +static auto big_lines_file() -> const char* { + static char path[] = "/tmp/cfbox_bench_lines_XXXXXX"; + static bool made = false; + if (!made) { + int fd = mkstemp(path); + std::FILE* f = (fd >= 0) ? fdopen(fd, "w") : nullptr; + if (f) { + for (unsigned i = 0; i < 200000u; ++i) { // 200k lines + std::fprintf(f, "line number %u has some words to read\n", i); + } + std::fclose(f); + } + made = true; + } + return path; +} + +static void BenchForEachLine(benchmark::State& state) { + const char* f = big_lines_file(); + for (auto _ : state) { + std::FILE* fp = std::fopen(f, "r"); + if (!fp) continue; + std::size_t count = 0; + auto result = cfbox::io::for_each_line(fp, [&count](const std::string&) { + ++count; + return true; + }); + std::fclose(fp); + benchmark::DoNotOptimize(result); + benchmark::DoNotOptimize(count); + } +} +BENCHMARK(BenchForEachLine)->Unit(benchmark::kMillisecond)->UseRealTime(); diff --git a/tests/benchmark/bench_sed.cpp b/tests/benchmark/bench_sed.cpp new file mode 100644 index 0000000..3030475 --- /dev/null +++ b/tests/benchmark/bench_sed.cpp @@ -0,0 +1,53 @@ +// Micro-benchmark: sed 's/foo/QUUX/g' on a big input. +// Targets the per-line regex recompile hot path (see document/ai/PERFORMANCE.md 批1). +#include + +#include +#include +#include +#include + +#include +#include + +#if CFBOX_ENABLE_SED + +static auto big_sed_file() -> const char* { + static char path[] = "/tmp/cfbox_bench_sed_XXXXXX"; + static bool made = false; + if (!made) { + int fd = mkstemp(path); + std::FILE* f = (fd >= 0) ? fdopen(fd, "w") : nullptr; + if (f) { + for (unsigned i = 0; i < 50000u; ++i) { + std::fprintf(f, "line %u foo bar baz\n", i); + } + std::fclose(f); + } + made = true; + } + return path; +} + +// sed 's/foo/QUUX/g' on 50k lines. Before 批1 each line recompiles the regex +// (apply_substitute builds a fresh scoped_regex); after 批1 it is compiled once +// at parse time. Same output either way (verified by test_sed + test_sed.sh). +static void BenchSedSubstitute(benchmark::State& state) { + const char* f = big_sed_file(); + int saved_stdout = dup(STDOUT_FILENO); + int devnull = open("/dev/null", O_WRONLY); + char a0[] = "sed", a1[] = "s/foo/QUUX/g"; + char* argv[] = {a0, a1, const_cast(f)}; + for (auto _ : state) { + dup2(devnull, STDOUT_FILENO); + int rc = sed_main(3, argv); + benchmark::DoNotOptimize(rc); + } + std::fflush(stdout); + if (saved_stdout >= 0) dup2(saved_stdout, STDOUT_FILENO); + if (devnull >= 0) close(devnull); + if (saved_stdout >= 0) close(saved_stdout); +} +BENCHMARK(BenchSedSubstitute)->Unit(benchmark::kMillisecond)->UseRealTime(); + +#endif // CFBOX_ENABLE_SED diff --git a/tests/benchmark/bench_sort.cpp b/tests/benchmark/bench_sort.cpp new file mode 100644 index 0000000..917fcfd --- /dev/null +++ b/tests/benchmark/bench_sort.cpp @@ -0,0 +1,63 @@ +// Micro-benchmark harness — see document/ai/PERFORMANCE.md. +// Build: cmake -B build-bench -DCMAKE_BUILD_TYPE=Release -DCFBOX_ENABLE_BENCHMARK=ON +// cmake --build build-bench -j$(nproc) +// Run: ./build-bench/cfbox_bench +// +// This first bench just validates the harness and gives a sort baseline; later +// batches add benches per hot path (sed regex, md5sum streaming, cmp, ...). + +#include + +#include +#include +#include +#include + +#include +#include + +#if CFBOX_ENABLE_SORT + +// Lazily create one big pseudo-random input (~50k lines) and reuse it across +// iterations, so the benchmark measures sort itself, not fixture generation. +static auto big_sort_file() -> const char* { + static char path[] = "/tmp/cfbox_bench_sort_XXXXXX"; + static bool made = false; + if (!made) { + int fd = mkstemp(path); + std::FILE* f = (fd >= 0) ? fdopen(fd, "w") : nullptr; + if (f) { + for (unsigned i = 0; i < 50000u; ++i) { + std::fprintf(f, "%u\n", (i * 2654435761u) & 0xffffu); + } + std::fclose(f); + } + made = true; + } + return path; +} + +// Benchmarks cfbox sort on the big input. Output is discarded to /dev/null so +// printing is never the bottleneck. Re-reads the file each iteration, which +// matches real sort usage (sort is not a pure in-memory transform). +static void BenchSort(benchmark::State& state) { + const char* f = big_sort_file(); + // Discard sort's stdout inside the loop (so printing isn't the bottleneck), + // but save/restore so google-benchmark's own report still reaches stdout. + int saved_stdout = dup(STDOUT_FILENO); + int devnull = open("/dev/null", O_WRONLY); + char arg0[] = "sort"; + char* argv[] = {arg0, const_cast(f)}; + for (auto _ : state) { + dup2(devnull, STDOUT_FILENO); + int rc = sort_main(2, argv); + benchmark::DoNotOptimize(rc); + } + std::fflush(stdout); + if (saved_stdout >= 0) dup2(saved_stdout, STDOUT_FILENO); + if (devnull >= 0) close(devnull); + if (saved_stdout >= 0) close(saved_stdout); +} +BENCHMARK(BenchSort)->Unit(benchmark::kMillisecond)->UseRealTime(); + +#endif // CFBOX_ENABLE_SORT diff --git a/tests/benchmark/run_bench_e2e.sh b/tests/benchmark/run_bench_e2e.sh new file mode 100755 index 0000000..f5b5d32 --- /dev/null +++ b/tests/benchmark/run_bench_e2e.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# 端到端 wall-clock:cfbox vs 系统 coreutils 在大输入上的耗时对比。 +# 补充进程内微基准(cfbox_bench)——这个看真实用户体验的 wall-clock。 +# advisory 信号(bench 噪声真实,CI 机器方差大);当信号看,不当硬门。见 document/ai/PERFORMANCE.md。 +set -u + +script_dir="$(cd "$(dirname "$0")" && pwd)" +repo="$(cd "$script_dir/../.." && pwd)" +# 优先用 Release 构建的 cfbox(公平计时);没有就退回 Debug。 +CFBOX="${CFBOX:-$repo/build-bench/cfbox}" +[[ -x "$CFBOX" ]] || CFBOX="$repo/build/cfbox" +if [[ ! -x "$CFBOX" ]]; then echo "ERROR: 找不到 cfbox 二进制(设 CFBOX 或先构建)" >&2; exit 2; fi + +export LC_ALL=C +tmp="$(mktemp -d)"; trap 'rm -rf "$tmp"' EXIT +seq 1 200000 | awk '{print "line "$1" foo bar baz"}' > "$tmp/big" # 20 万行 +head -c 50000000 /dev/urandom > "$tmp/big50" # 50 MB 二进制 +cp "$tmp/big50" "$tmp/big50b" + +# 跑一次命令,打印耗时(毫秒) +ms() { local s e; s=$(date +%s%N); "$@" >/dev/null 2>&1; e=$(date +%s%N); echo $(( (e - s) / 1000000 )); } +# 取 3 次的中位数 +med3() { local a b c; a=$(ms "$@"); b=$(ms "$@"); c=$(ms "$@"); printf '%s\n' "$a" "$b" "$c" | sort -n | sed -n 2p; } +# run :比 cfbox vs /usr/bin/ (type -P 绕开别名) +run() { + local applet=$1; shift + local ora; ora=$(type -P "$applet") + local cf or + cf=$(med3 "$CFBOX" "$applet" "$@") + or=$(med3 "$ora" "$@") + printf ' %-8s cfbox=%6sms core=%6sms ratio=%5.2fx\n' "$applet" "$cf" "$or" "$(awk -v c="$cf" -v o="$or" 'BEGIN{print (o>0)? c/o : 0}')" +} + +echo "cfbox : $CFBOX" +echo "input : $(wc -l < "$tmp/big") 行文本 + 50MB 二进制" +echo "=== 文本处理(grep/wc/sort/cut)===" +run grep foo "$tmp/big" +run grep -c foo "$tmp/big" +run wc "$tmp/big" +run sort "$tmp/big" +run cut -c1-10 "$tmp/big" +echo "=== 大文件二进制(md5sum/cmp)===" +run md5sum "$tmp/big50" +run cmp "$tmp/big50" "$tmp/big50b" +echo "(ratio < 1.0 = cfbox 比 coreutils 快;> 1.0 = 慢)" diff --git a/tests/check_structure_gates.sh b/tests/check_structure_gates.sh new file mode 100755 index 0000000..bf640ff --- /dev/null +++ b/tests/check_structure_gates.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# tests/check_structure_gates.sh — 结构机械护栏(见 document/ai/STRUCTURE-TASTE.md §2)。 +# 硬门(CI exit 非零): +# 门 1 banned-pattern —— 精确匹配"调用"(带括号),不误伤注释/标识符(如 awk 的 sprintf 函数名)。 +# a) 不安全 C 函数 sprintf(/strcpy(/strcat(/gets( —— 用 snprintf/std::string/string_view。 +# b) std::stoi(/std::stol( —— 项目 -fno-exceptions,非法输入会抛→terminate;用 cfbox::args::parse_int。 +# c) applets 里的裸 std::fopen —— 用 cfbox::io::open_file(io.hpp 本身是允许的封装层,不在 src/applets 下)。 +# 门 2 layering —— applets 不自造递归目录遍历,用 cfbox::fs::for_each_entry 或加注释豁免。 +# 现有 5 个 grandfather:需求(depth/filter/per-entry-error/相对路径)超出 for_each_entry 能力。 +set -u + +script_dir="$(cd "$(dirname "$0")" && pwd)" +repo="$(cd "$script_dir/.." && pwd)" +cd "$repo" + +fail=0 +report() { sed 's/^/ /'; } + +echo "=== 门 1:banned-pattern ===" + +n=$(grep -rnE '\b(sprintf|strcpy|strcat|gets)\(' src/ include/ 2>/dev/null | wc -l | tr -d ' ') +echo " 不安全 C 函数调用(sprintf/strcpy/strcat/gets): $n" +if (( n > 0 )); then grep -rnE '\b(sprintf|strcpy|strcat|gets)\(' src/ include/ 2>/dev/null | report; fail=1; fi + +n=$(grep -rnE 'std::sto[il]\(' src/ 2>/dev/null | wc -l | tr -d ' ') +echo " std::stoi/stol 调用: $n" +if (( n > 0 )); then grep -rnE 'std::sto[il]\(' src/ 2>/dev/null | report; fail=1; fi + +n=$(grep -rn 'std::fopen' src/applets/ 2>/dev/null | wc -l | tr -d ' ') +echo " applets 裸 std::fopen: $n" +if (( n > 0 )); then grep -rn 'std::fopen' src/applets/ 2>/dev/null | report; fail=1; fi + +echo "=== 门 2:layering(applets 不自造递归遍历)===" + +# grandfather:for_each_entry 覆盖不了的合理用例 +allow='src/applets/du.cpp src/applets/find.cpp src/applets/tar.cpp src/applets/sysctl.cpp src/applets/grep.cpp' +new_layer=0 +while IFS= read -r f; do + [[ -z "$f" ]] && continue + ok=0 + for g in $allow; do [[ "$f" == "$g" ]] && ok=1 && break; done + if (( ! ok )); then + echo " FAIL: $f 直接用 recursive_directory_iterator —— 用 cfbox::fs::for_each_entry 或加注释豁免" + new_layer=$((new_layer + 1)) + fail=1 + fi +done < <(grep -rln 'recursive_directory_iterator' src/applets/ 2>/dev/null) +echo " grandfather(允许): $(echo $allow | wc -w | tr -d ' ') 个;新增裸奔: $new_layer" + +echo "===" +if (( fail )); then echo "结构机械护栏:FAIL"; exit 1; fi +echo "结构机械护栏:PASS" +exit 0 diff --git a/tests/expected_counts.txt b/tests/expected_counts.txt index 57d867a..f064264 100644 --- a/tests/expected_counts.txt +++ b/tests/expected_counts.txt @@ -2,6 +2,7 @@ # 由 tests/check_test_floor.sh 读取校验。详见 document/ai/COVERAGE.md。 # GTest 数取自 `ctest -N` 的 "Total Tests:" 行;集成脚本数取自 tests/integration/test_*.sh。 # 2026-06-28 首次钉死真值 436+54(之前文档在 436/399、56/54 之间漂移); -# 同日 sort -rn 修复时新增 SortTest.ReverseNumericStableOnTies → 437: -gtest 437 +# 同日 sort -rn 修复时新增 SortTest.ReverseNumericStableOnTies → 437; +# 结构维度批1 删死抽象 ErrorView(及其 ErrorViewFields 测试)→ 回到 436: +gtest 436 integration 54 diff --git a/tests/unit/test_error.cpp b/tests/unit/test_error.cpp index f64ec80..d7269c4 100644 --- a/tests/unit/test_error.cpp +++ b/tests/unit/test_error.cpp @@ -38,15 +38,6 @@ TEST(ErrorTest, ErrorMove) { EXPECT_EQ(b.msg, "moved"); } -// ── ErrorView struct ───────────────────────────────────────── - -TEST(ErrorTest, ErrorViewFields) { - constexpr int code = 4; - ErrorView ev{code, "view msg"}; - EXPECT_EQ(ev.code, 4); - EXPECT_EQ(ev.msg, "view msg"); -} - // ── Result ok path ──────────────────────────────────────── TEST(ErrorTest, ResultOk) {