From a3b89edae8256d2948bdfcbda54ec010321be0e7 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:25:46 +0800 Subject: [PATCH 01/21] feat(cp): add -a archive mode preserving attrs and symlinks - new -a/--archive: -r + preserve mode/owner/time, copy symlinks as links - link-aware lstat-driven tree walk never follows symlinks (TOCTOU-safe) - fix -p preserving only mode: now also sets owner + atime/mtime - add fs::lstat and fs::set_times (utimensat, AT_SYMLINK_NOFOLLOW) - directory attrs applied last so child writes don't reset dir mtime - fs_util.hpp: include (was relying on transitive include) --- include/cfbox/fs_util.hpp | 24 ++++ src/applets/cp.cpp | 205 +++++++++++++++++++++++------------ tests/integration/test_cp.sh | 22 ++++ tests/unit/test_cp.cpp | 131 +++++++++++++++++++++- 4 files changed, 312 insertions(+), 70 deletions(-) diff --git a/include/cfbox/fs_util.hpp b/include/cfbox/fs_util.hpp index 68cef56..8c694a3 100644 --- a/include/cfbox/fs_util.hpp +++ b/include/cfbox/fs_util.hpp @@ -3,11 +3,14 @@ #include #include #include +#include #include #include #include +#include #include #include +#include #include @@ -256,6 +259,27 @@ inline auto lchown(std::string_view path, uid_t uid, gid_t gid) -> base::Result< return {}; } +// lstat — link-aware status (does NOT follow symlinks). Archive copy must read +// the link itself rather than its target, so it cannot use std::filesystem::status. +inline auto lstat(std::string_view path) -> base::Result { + struct stat st{}; + if (::lstat(std::string{path}.c_str(), &st) != 0) { + return std::unexpected(base::Error{errno, std::strerror(errno)}); + } + return st; +} + +// Set atime/mtime via utimensat. no_follow applies AT_SYMLINK_NOFOLLOW so a +// symlink's own timestamps are set without dereferencing its target (utime(2) +// cannot do this). times[0]=atime, times[1]=mtime. +inline auto set_times(std::string_view path, const struct timespec times[2], bool no_follow) -> base::Result { + int flags = no_follow ? AT_SYMLINK_NOFOLLOW : 0; + if (::utimensat(AT_FDCWD, std::string{path}.c_str(), times, flags) != 0) { + return std::unexpected(base::Error{errno, std::strerror(errno)}); + } + return {}; +} + template void for_each_entry(std::string_view path, bool recursive, Func&& fn) { if (recursive && is_directory(path)) { diff --git a/src/applets/cp.cpp b/src/applets/cp.cpp index aaeafda..1057691 100644 --- a/src/applets/cp.cpp +++ b/src/applets/cp.cpp @@ -1,12 +1,15 @@ +#include + #include +#include #include #include #include +#include #include #include #include -#include namespace { @@ -15,28 +18,117 @@ constexpr cfbox::help::HelpEntry HELP = { .version = CFBOX_VERSION_STRING, .one_line = "copy files and directories", .usage = "cp [OPTIONS] SOURCE... DEST", - .options = " -r copy directories recursively\n" - " -p preserve mode, ownership, and timestamps", + .options = " -r, --recursive copy directories recursively\n" + " -a, --archive archive mode: -r + preserve mode/owner/time, copy symlinks as links\n" + " -p, --preserve preserve mode, ownership, and timestamps", .extra = "", }; -auto copy_preserve(const std::string& src, const std::string& dst) -> int { - // Copy the file first - auto copy_result = cfbox::fs::copy_file(src, dst); - if (!copy_result) { - CFBOX_ERR("cp", "%s", copy_result.error().msg.c_str()); +// Copy ownership + atime/mtime + mode from src to dst. Failures are non-fatal: +// ownership preservation needs privileges (EPERM for non-root), and a missing +// attribute must not abort a multi-file copy (matches coreutils -p behavior). +auto preserve_attrs(const std::string& src, const std::string& dst, bool is_symlink) -> void { + auto st = cfbox::fs::lstat(src); + if (!st) { + return; + } + + // Ownership: lchown for symlinks (no dereference), chown otherwise. + auto owner = is_symlink ? cfbox::fs::lchown(dst, st->st_uid, st->st_gid) + : cfbox::fs::chown(dst, st->st_uid, st->st_gid); + (void)owner; // non-fatal + + // Timestamps: symlinks need AT_SYMLINK_NOFOLLOW so we don't touch the target. + struct timespec times[2]; + times[0] = st->st_atim; + times[1] = st->st_mtim; + auto tr = cfbox::fs::set_times(dst, times, is_symlink); + (void)tr; // non-fatal + + // Mode is meaningless on a symlink itself; only set it for regular files/dirs. + if (!is_symlink) { + auto pr = cfbox::fs::permissions(dst, static_cast(st->st_mode & 0777)); + (void)pr; // non-fatal + } +} + +// Copy a single filesystem entry in archive mode, link-aware (never follows +// symlinks). Returns 0 on success, 1 on a content-copy failure. Attribute +// failures are reported non-fatally inside preserve_attrs. +auto copy_one_archive(const std::string& src, const std::string& dst) -> int { + auto st = cfbox::fs::lstat(src); + if (!st) { + CFBOX_ERR("cp", "cannot stat '%s': %s", src.c_str(), st.error().msg.c_str()); return 1; } - // Preserve permissions - auto status_result = cfbox::fs::status(src); - if (status_result) { - auto perm_result = cfbox::fs::permissions(dst, status_result->permissions()); - if (!perm_result) { - // non-fatal + if (S_ISLNK(st->st_mode)) { + auto target = cfbox::fs::read_symlink(src); + if (!target) { + CFBOX_ERR("cp", "cannot read symlink '%s': %s", src.c_str(), target.error().msg.c_str()); + return 1; + } + // Overwrite an existing destination entry so re-runs are idempotent. + if (cfbox::fs::exists(dst)) { + auto rm = cfbox::fs::remove_all(dst); + (void)rm; } + auto cr = cfbox::fs::create_symlink(*target, dst); + if (!cr) { + CFBOX_ERR("cp", "cannot create symlink '%s': %s", dst.c_str(), cr.error().msg.c_str()); + return 1; + } + preserve_attrs(src, dst, /*is_symlink=*/true); + return 0; } + if (S_ISDIR(st->st_mode)) { + // Create with default (writable) perms first; the source mode is applied + // after children are copied so a read-only source dir never blocks writes. + std::error_code ec; + std::filesystem::create_directory(std::filesystem::path{dst}, ec); + if (ec) { + CFBOX_ERR("cp", "cannot create directory '%s': %s", dst.c_str(), ec.message().c_str()); + return 1; + } + + int rc = 0; + auto entries = cfbox::fs::directory_entries(src); + if (!entries) { + CFBOX_ERR("cp", "cannot read directory '%s': %s", src.c_str(), entries.error().msg.c_str()); + return 1; + } + for (const auto& e : *entries) { + auto name = e.path().filename(); + std::string child_src = (std::filesystem::path{src} / name).string(); + std::string child_dst = (std::filesystem::path{dst} / name).string(); + if (copy_one_archive(child_src, child_dst) != 0) { + rc = 1; + } + } + // Apply mode/owner/time last so child creation doesn't reset the dir mtime. + preserve_attrs(src, dst, /*is_symlink=*/false); + return rc; + } + + // Regular file (device/fifo/socket fall through to copy_file best-effort). + auto cr = cfbox::fs::copy_file(src, dst); + if (!cr) { + CFBOX_ERR("cp", "cannot copy '%s' to '%s': %s", src.c_str(), dst.c_str(), cr.error().msg.c_str()); + return 1; + } + preserve_attrs(src, dst, /*is_symlink=*/false); + return 0; +} + +// Legacy single-file preserve copy (-p without -a): content + mode/owner/time. +auto copy_preserve(const std::string& src, const std::string& dst) -> int { + auto copy_result = cfbox::fs::copy_file(src, dst); + if (!copy_result) { + CFBOX_ERR("cp", "%s", copy_result.error().msg.c_str()); + return 1; + } + preserve_attrs(src, dst, /*is_symlink=*/false); return 0; } @@ -46,13 +138,15 @@ auto cp_main(int argc, char* argv[]) -> int { auto parsed = cfbox::args::parse(argc, argv, { cfbox::args::OptSpec{'r', false, "recursive"}, cfbox::args::OptSpec{'p', false, "preserve"}, + cfbox::args::OptSpec{'a', false, "archive"}, }); if (parsed.has_long("help")) { cfbox::help::print_help(HELP); return 0; } if (parsed.has_long("version")) { cfbox::help::print_version(HELP); return 0; } - bool recursive = parsed.has('r'); - bool preserve = parsed.has('p'); + const bool archive = parsed.has('a'); + const bool recursive = parsed.has('r') || archive; + const bool preserve = parsed.has('p'); const auto& pos = parsed.positional(); if (pos.size() < 2) { @@ -60,81 +154,54 @@ auto cp_main(int argc, char* argv[]) -> int { return 1; } - // Last argument is the destination std::string dst{pos.back()}; int rc = 0; - if (pos.size() == 2) { - // Single source - std::string src{pos[0]}; + auto copy_source = [&](const std::string& src) { + std::string dest = dst; + if (cfbox::fs::is_directory(dst)) { + std::filesystem::path src_path{src}; + dest = (std::filesystem::path{dst} / src_path.filename()).string(); + } + + if (archive) { + // -a copies anything (files, dirs, symlinks) without following links. + if (copy_one_archive(src, dest) != 0) rc = 1; + return; + } if (cfbox::fs::is_directory(src)) { if (!recursive) { CFBOX_ERR("cp", "-r not specified; omitting directory '%s'", src.c_str()); - return 1; - } - // Determine destination path - std::string dest = dst; - if (cfbox::fs::is_directory(dst)) { - // Copy into directory - std::filesystem::path src_path{src}; - dest = (std::filesystem::path{dst} / src_path.filename()).string(); + rc = 1; + return; } auto result = cfbox::fs::copy_recursive(src, dest); if (!result) { CFBOX_ERR("cp", "cannot copy '%s' to '%s': %s", src.c_str(), dest.c_str(), result.error().msg.c_str()); rc = 1; } + } else if (preserve) { + if (copy_preserve(src, dest) != 0) rc = 1; } else { - std::string dest = dst; - if (cfbox::fs::is_directory(dst)) { - std::filesystem::path src_path{src}; - dest = (std::filesystem::path{dst} / src_path.filename()).string(); - } - if (preserve) { - rc = copy_preserve(src, dest); - } else { - auto result = cfbox::fs::copy_file(src, dest); - if (!result) { - CFBOX_ERR("cp", "cannot copy '%s' to '%s': %s", src.c_str(), dest.c_str(), result.error().msg.c_str()); - rc = 1; - } + auto result = cfbox::fs::copy_file(src, dest); + if (!result) { + CFBOX_ERR("cp", "cannot copy '%s' to '%s': %s", src.c_str(), dest.c_str(), result.error().msg.c_str()); + rc = 1; } } + }; + + if (pos.size() == 2) { + copy_source(std::string{pos[0]}); } else { - // Multiple sources — destination must be a directory + // Multiple sources — destination must be a directory. if (!cfbox::fs::is_directory(dst)) { CFBOX_ERR("cp", "target '%s' is not a directory", dst.c_str()); return 1; } - for (std::size_t i = 0; i < pos.size() - 1; ++i) { - std::string src{pos[i]}; - std::filesystem::path src_path{src}; - std::string dest = (std::filesystem::path{dst} / src_path.filename()).string(); - - if (cfbox::fs::is_directory(src)) { - if (!recursive) { - CFBOX_ERR("cp", "-r not specified; omitting directory '%s'", src.c_str()); - rc = 1; - continue; - } - auto result = cfbox::fs::copy_recursive(src, dest); - if (!result) { - CFBOX_ERR("cp", "cannot copy '%s' to '%s': %s", src.c_str(), dest.c_str(), result.error().msg.c_str()); - rc = 1; - } - } else { - if (preserve) { - if (copy_preserve(src, dest) != 0) rc = 1; - } else { - auto result = cfbox::fs::copy_file(src, dest); - if (!result) { - CFBOX_ERR("cp", "cannot copy '%s' to '%s': %s", src.c_str(), dest.c_str(), result.error().msg.c_str()); - rc = 1; - } - } - } + copy_source(std::string{pos[i]}); } } diff --git a/tests/integration/test_cp.sh b/tests/integration/test_cp.sh index 019e005..ee116f8 100755 --- a/tests/integration/test_cp.sh +++ b/tests/integration/test_cp.sh @@ -55,6 +55,28 @@ mkdir -p "$tmpdir/multi_dst" run_test "multi_into_dir" 0 "$tmpdir/m1.txt" "$tmpdir/m2.txt" "$tmpdir/multi_dst" [[ -f "$tmpdir/multi_dst/m1.txt" && -f "$tmpdir/multi_dst/m2.txt" ]] && ((++pass)) || { echo "FAIL [cp multi]"; ((++fail)); } +# cp -a: preserve mode/time + copy symlinks as links (never follow) +mkdir -p "$tmpdir/archsrc/sub" +echo "data" > "$tmpdir/archsrc/file.txt" +echo "inner" > "$tmpdir/archsrc/sub/inner.txt" +ln -s file.txt "$tmpdir/archsrc/link.txt" +ln -s /nonexistent "$tmpdir/archsrc/broken" +chmod 0640 "$tmpdir/archsrc/file.txt" +run_test "archive" 0 -a "$tmpdir/archsrc" "$tmpdir/archdst" + +[[ -f "$tmpdir/archdst/file.txt" && -f "$tmpdir/archdst/sub/inner.txt" ]] && ((++pass)) || { echo "FAIL [cp -a structure]"; ((++fail)); } + +src_mode=$(stat -c '%a' "$tmpdir/archsrc/file.txt") +dst_mode=$(stat -c '%a' "$tmpdir/archdst/file.txt") +[[ "$src_mode" == "$dst_mode" ]] && ((++pass)) || { echo "FAIL [cp -a mode $src_mode!=$dst_mode]"; ((++fail)); } + +[[ -L "$tmpdir/archdst/link.txt" ]] && ((++pass)) || { echo "FAIL [cp -a symlink not a link]"; ((++fail)); } +src_link=$(readlink "$tmpdir/archsrc/link.txt") +dst_link=$(readlink "$tmpdir/archdst/link.txt") +[[ "$src_link" == "$dst_link" ]] && ((++pass)) || { echo "FAIL [cp -a link target $src_link!=$dst_link]"; ((++fail)); } + +[[ -L "$tmpdir/archdst/broken" ]] && ((++pass)) || { echo "FAIL [cp -a broken symlink]"; ((++fail)); } + # cp no operand run_test "no_operand" 1 diff --git a/tests/unit/test_cp.cpp b/tests/unit/test_cp.cpp index b43d8b1..367d25d 100644 --- a/tests/unit/test_cp.cpp +++ b/tests/unit/test_cp.cpp @@ -1,7 +1,13 @@ +#include + +#include +#include + +#include #include +#include #include #include "test_capture.hpp" -#include #if CFBOX_ENABLE_CP @@ -67,4 +73,127 @@ TEST(CpTest, SourceNotExist) { EXPECT_NE(cp_main(3, argv), 0); } +// --- attribute-preservation helpers (raw stat avoids file_time_type glue) --- +namespace { +auto set_mtime(const std::string& path, time_t t) -> void { + struct timespec times[2] = {{t, 0}, {t, 0}}; + auto r = cfbox::fs::set_times(path, times, /*no_follow=*/false); + (void)r; +} +auto mode_of(const std::string& path) -> unsigned { + struct stat st{}; + ::stat(path.c_str(), &st); + return st.st_mode & 0777; +} +auto mtime_of(const std::string& path) -> time_t { + struct stat st{}; + ::stat(path.c_str(), &st); + return st.st_mtime; +} +} // namespace + +TEST(CpTest, ArchiveCopyPreservesMode) { + TempDir tmp; + auto src = tmp.write_file("src.txt", "data"); + std::filesystem::permissions(src, std::filesystem::perms::owner_read | std::filesystem::perms::owner_exec); + auto dst = (tmp.path / "dst.txt").string(); + char a0[] = "cp", a1[] = "-a", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", src.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dst.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + EXPECT_EQ(mode_of(src), mode_of(dst)); +} + +TEST(CpTest, ArchiveCopyPreservesTimestamp) { + TempDir tmp; + auto src = tmp.write_file("src.txt", "data"); + set_mtime(src, 1'000'000'000); // fixed old time, distinct from "now" + auto dst = (tmp.path / "dst.txt").string(); + char a0[] = "cp", a1[] = "-a", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", src.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dst.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + EXPECT_EQ(mtime_of(src), mtime_of(dst)); +} + +TEST(CpTest, ArchiveCopySymlinkAsLink) { + TempDir tmp; + auto target = tmp.write_file("target.txt", "x"); + auto link = (tmp.path / "link").string(); + std::filesystem::create_symlink(target, link); + auto dstlink = (tmp.path / "dstlink").string(); + char a0[] = "cp", a1[] = "-a", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", link.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dstlink.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + // Destination must be a symlink, not a followed copy of the target. + EXPECT_TRUE(std::filesystem::is_symlink(std::filesystem::symlink_status(dstlink))); + auto t1 = cfbox::fs::read_symlink(link); + auto t2 = cfbox::fs::read_symlink(dstlink); + ASSERT_TRUE(t1.has_value() && t2.has_value()); + EXPECT_EQ(t1.value(), t2.value()); +} + +TEST(CpTest, ArchiveCopyBrokenSymlink) { + TempDir tmp; + auto link = (tmp.path / "broken").string(); + std::filesystem::create_symlink("/nonexistent/cfbox-target", link); + auto dst = (tmp.path / "dst").string(); + char a0[] = "cp", a1[] = "-a", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", link.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dst.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + EXPECT_TRUE(std::filesystem::is_symlink(std::filesystem::symlink_status(dst))); +} + +TEST(CpTest, ArchiveCopyTree) { + TempDir tmp; + std::filesystem::create_directories(tmp.path / "srcdir" / "sub"); + auto a = tmp.write_file("srcdir/a.txt", "aaa"); + tmp.write_file("srcdir/sub/b.txt", "bbb"); + std::filesystem::permissions(a, std::filesystem::perms::owner_read | std::filesystem::perms::group_read); + auto srcdir = (tmp.path / "srcdir").string(); + auto dstdir = (tmp.path / "dstdir").string(); + char a0[] = "cp", a1[] = "-a", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", srcdir.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dstdir.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + EXPECT_TRUE(std::filesystem::exists(tmp.path / "dstdir" / "a.txt")); + EXPECT_TRUE(std::filesystem::exists(tmp.path / "dstdir" / "sub" / "b.txt")); + EXPECT_EQ(mode_of((tmp.path / "dstdir" / "a.txt").string()), mode_of(a)); +} + +TEST(CpTest, ArchiveCopyDirectoryTimestamp) { + TempDir tmp; + std::filesystem::create_directories(tmp.path / "srcdir" / "sub"); + tmp.write_file("srcdir/sub/inner.txt", "x"); // child creation would reset mtime + auto srcdir = (tmp.path / "srcdir").string(); + auto dstdir = (tmp.path / "dstdir").string(); + set_mtime(srcdir, 1'500'000'000); + char a0[] = "cp", a1[] = "-a", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", srcdir.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dstdir.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + EXPECT_EQ(mtime_of(srcdir), mtime_of(dstdir)); +} + +TEST(CpTest, PreserveKeepsTimestamp) { + TempDir tmp; + auto src = tmp.write_file("src.txt", "data"); + set_mtime(src, 1'200'000'000); + auto dst = (tmp.path / "dst.txt").string(); + char a0[] = "cp", a1[] = "-p", a2[256], a3[256]; + std::snprintf(a2, sizeof(a2), "%s", src.c_str()); + std::snprintf(a3, sizeof(a3), "%s", dst.c_str()); + char* argv[] = {a0, a1, a2, a3}; + EXPECT_EQ(cp_main(4, argv), 0); + EXPECT_EQ(mtime_of(src), mtime_of(dst)); +} + #endif // CFBOX_ENABLE_CP From 967e185bb6aaf170bd50ce4df3b21226d252377c Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:27:32 +0800 Subject: [PATCH 02/21] docs(plan): record cp archive batch done --- document/ai/PLAN.md | 4 ++-- document/notes/2026-06-28-cp-archive.md | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 document/notes/2026-06-28-cp-archive.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 7916bb9..6bfba9e 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批2 `cp -a`(归档模式:保权限/属主/时间戳/symlink/递归)。 +> 焦点 → Phase 2 批3 `test` POSIX 三态(整数校验 + 退出码 0/1/2 + 补 -h/-nt/-ot/-ef + 递归下降)。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -20,7 +20,7 @@ | 批 | 范围 | 状态 | Commit | 测试 | |----|------|------|--------|------| | 批1 | `tail -f/-F`(fd-based follow:fstat 轮询 + 64KiB quantum + -F drain-switch + SIGINT 退出 0) | ✅ | bff34e9 | 381/0 | -| 批2 | `cp -a`(归档模式:保权限/属主/时间戳/symlink/递归) | 🔄 NEXT | — | — | +| 批2 | `cp -a`(归档模式:保权限/属主/时间戳/symlink/递归) | ✅ | a3b89ed | 406/0 | | 批3 | `test` POSIX 子集(文件测试/字符串/整数/复合表达式,退出码语义) | ⏳ | — | — | | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ⏳ | — | — | | 批5+ | grep -A/-B/-C、find 布尔表达式、sh 深化(按运维频率排) | ⏳ | — | — | diff --git a/document/notes/2026-06-28-cp-archive.md b/document/notes/2026-06-28-cp-archive.md new file mode 100644 index 0000000..733a24a --- /dev/null +++ b/document/notes/2026-06-28-cp-archive.md @@ -0,0 +1,22 @@ +# 2026-06-28 — cp -a 归档模式(Phase 2 批2) + +## 背景 +coreutils `cp -a`(archive)= `-rd --preserve=all`:递归 + 保 mode/owner/time + 复制 symlink 本身(不跟随)。顺带修复既有 `-p` 名实不符(实现只保 mode,help 却声称保 mode/owner/time)。 + +## 设计决策 +- **lstat 驱动遍历,绝不跟随 symlink**:弃 `std::filesystem::copy`(默认跟随 symlink + 不保属性),自写 `copy_one_archive` 按 `S_ISLNK/S_ISDIR` 分发。这是 TOCTOU/symlink 高危面(PLAN GOTCHA #4)。 +- **symlink 复制**:`read_symlink` + `create_symlink` 复制链接本身;owner 用 `lchown`,time 用 `utimensat(AT_SYMLINK_NOFOLLOW)`——`utime(2)` 不支持 symlink。 +- **目录属性回填顺序**:先 `create_directory`(默认可写权限)→ 递归子项 → **最后** `preserve_attrs` 设 mode/owner/time。否则子项创建会刷新父目录 mtime,导致 -a 保时间戳失败(只在含子项目录暴露,单测易漏)。 +- **属性失败 non-fatal**:owner 在非 root 下 EPERM 常见,coreutils 也是警告不致命;仅内容复制失败计 rc=1。 +- **新增 fs 原语**:`fs::lstat`(link-aware)、`fs::set_times(path, timespec[2], no_follow)`。 +- **潜伏 bug 顺手修**:`fs_util.hpp` 用 `std::vector` 却从未 include ``(靠 `` 传递包含侥幸编译),gcc-16 收紧传递 include 后暴露。显式补 ``。 + +## 验证 +- GTest +7(ArchiveCopyPreservesMode/Timestamp/SymlinkAsLink/BrokenSymlink/Tree/DirectoryTimestamp + PreserveKeepsTimestamp),全量 **406/0** 绿。 +- 集成 test_cp.sh +`cp -a` 场景(结构/mode/symlink/broken),54 脚本全绿。 +- size-opt **422 KB**(基线 418 KB,+4 KB),≤550 KB 红线。 + +## 陷阱(留给后续批/维护者) +- `-r`(非 `-a`)仍走 `copy_recursive`(跟随 symlink)——coreutils `-r` 本就如此,向后兼容未改;安全归档一律用 `-a`。 +- `st_atim/st_mtim` 是 POSIX 2008 字段,glibc/musl 可用;BSD 系 `st_atimespec` 需条件编译——cfbox Linux-only 暂无忧,交叉编译靠 CI cross 阶段兜底。 +- commit: `a3b89ed` From 0f9b3cb9f20af98bde459807e2e0e08735e7f3f2 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:38:47 +0800 Subject: [PATCH 03/21] feat(test): POSIX three-state exit codes and recursive-descent parser - integer operands validated via from_chars (abc -eq abc now exits 2) - recursive-descent parser: -o/-a/!/( ) with correct precedence - arity/syntax errors unified to exit 2 (bare -z, unknown -op, stray paren) - add -h/-b/-c/-p/-S file tests, -nt/-ot/-ef file comparisons, string < > - HELP.extra synced --- src/applets/test.cpp | 292 +++++++++++++++++++++------------ tests/integration/test_test.sh | 31 ++++ tests/unit/test_test.cpp | 91 +++++++++- 3 files changed, 310 insertions(+), 104 deletions(-) diff --git a/src/applets/test.cpp b/src/applets/test.cpp index 8de41ff..b929a5f 100644 --- a/src/applets/test.cpp +++ b/src/applets/test.cpp @@ -1,15 +1,16 @@ +#include +#include #include -#include -#include +#include #include #include #include +#include #include #include -#include -#include #include +#include namespace { constexpr cfbox::help::HelpEntry HELP = { @@ -18,19 +19,26 @@ constexpr cfbox::help::HelpEntry HELP = { .one_line = "evaluate conditional expression", .usage = "test EXPRESSION\n [ EXPRESSION ]", .options = "", - .extra = "String: -z STR, -n STR, STR1 = STR2, STR1 != STR2\n" + .extra = "String: -z STR, -n STR, STR1 = STR2, STR1 != STR2, STR1 < STR2, STR1 > STR2\n" "Integer: INT1 -eq/-ne/-lt/-le/-gt/-ge INT2\n" - "File: -e/-f/-d/-r/-w/-x/-s/-L FILE\n" + "File: -e/-f/-d/-r/-w/-x/-s/-L/-h/-b/-c/-p/-S FILE\n" + "File2: FILE1 -nt/-ot/-ef FILE2\n" "Logic: ! EXPR, EXPR -a EXPR, EXPR -o EXPR, ( EXPR )", }; using Args = std::vector; -// Forward declarations -auto eval_expr(const Args& args) -> int; - -auto to_int(std::string_view s) -> long { - return std::strtol(s.data(), nullptr, 10); +// Parse a base-10 integer; the whole token must be consumed (no trailing junk). +// Returns nullopt for "abc", "5x", "" — POSIX test treats these as exit-2 errors. +// (std::strtol would silently coerce "abc" -> 0, masking invalid operands.) +auto parse_int(std::string_view s) -> std::optional { + if (s.empty()) return std::nullopt; + long val = 0; + auto res = std::from_chars(s.data(), s.data() + s.size(), val); + if (res.ec != std::errc{} || res.ptr != s.data() + s.size()) { + return std::nullopt; + } + return val; } auto file_test(char op, std::string_view path) -> bool { @@ -43,137 +51,215 @@ auto file_test(char op, std::string_view path) -> bool { case 'w': return access(path.data(), W_OK) == 0; case 'x': return access(path.data(), X_OK) == 0; case 's': return stat(path.data(), &st) == 0 && st.st_size > 0; - case 'L': return lstat(path.data(), &st) == 0 && S_ISLNK(st.st_mode); + case 'L': case 'h': return lstat(path.data(), &st) == 0 && S_ISLNK(st.st_mode); + case 'b': return stat(path.data(), &st) == 0 && S_ISBLK(st.st_mode); + case 'c': return stat(path.data(), &st) == 0 && S_ISCHR(st.st_mode); + case 'p': return stat(path.data(), &st) == 0 && S_ISFIFO(st.st_mode); + case 'S': return stat(path.data(), &st) == 0 && S_ISSOCK(st.st_mode); default: return false; } } -// Evaluate with -o (lowest precedence) -auto eval_or(const Args& args) -> int; +// -nt: lhs exists and is newer (by mtime) than rhs; a missing rhs counts as +// infinitely old, so an existing lhs -nt a missing rhs is true. +auto newer_than(std::string_view lhs, std::string_view rhs) -> int { + struct stat s1 {}, s2 {}; + if (stat(lhs.data(), &s1) != 0) return 1; // lhs must exist + if (stat(rhs.data(), &s2) != 0) return 0; // rhs missing -> lhs newer + return s1.st_mtime > s2.st_mtime ? 0 : 1; +} -// Evaluate with -a (higher precedence than -o) -auto eval_and(const Args& args) -> int; +auto older_than(std::string_view lhs, std::string_view rhs) -> int { + struct stat s1 {}, s2 {}; + if (stat(lhs.data(), &s1) != 0) return 1; + if (stat(rhs.data(), &s2) != 0) return 0; + return s1.st_mtime < s2.st_mtime ? 0 : 1; +} -auto eval_or(const Args& args) -> int { - // Find first -o to split (lowest precedence) - for (std::size_t i = 0; i < args.size(); ++i) { - if (args[i] == "-o") { - int left = eval_or(Args(args.begin(), args.begin() + static_cast(i))); - if (left == 0) return 0; // short-circuit: left is true - return eval_and(Args(args.begin() + static_cast(i) + 1, args.end())); - } +// -ef: same device + inode (hard links or the same file). +auto same_file(std::string_view lhs, std::string_view rhs) -> int { + struct stat s1 {}, s2 {}; + if (stat(lhs.data(), &s1) != 0 || stat(rhs.data(), &s2) != 0) return 1; + return (s1.st_dev == s2.st_dev && s1.st_ino == s2.st_ino) ? 0 : 1; +} + +auto is_unary_op(std::string_view s) -> bool { + if (s.size() != 2 || s[0] != '-') return false; + switch (s[1]) { + case 'z': case 'n': case 'e': case 'f': case 'd': case 'r': + case 'w': case 'x': case 's': case 'L': case 'h': + case 'b': case 'c': case 'p': case 'S': + return true; + default: return false; } - return eval_and(args); } -auto eval_and(const Args& args) -> int { - // Find first -a to split (higher precedence than -o) - for (std::size_t i = 0; i < args.size(); ++i) { - if (args[i] == "-a") { - int left = eval_and(Args(args.begin(), args.begin() + static_cast(i))); - if (left != 0) return 1; // short-circuit: left is false - return eval_expr(Args(args.begin() + static_cast(i) + 1, args.end())); - } +auto is_binary_op(std::string_view s) -> bool { + return s == "=" || s == "!=" || s == "<" || s == ">" + || s == "-eq" || s == "-ne" || s == "-lt" || s == "-le" + || s == "-gt" || s == "-ge" || s == "-nt" || s == "-ot" || s == "-ef"; +} + +auto eval_unary(char op, std::string_view arg) -> int { + if (op == 'z') return arg.empty() ? 0 : 1; + if (op == 'n') return arg.empty() ? 1 : 0; + return file_test(op, arg) ? 0 : 1; +} + +auto eval_binary(std::string_view lhs, std::string_view op, std::string_view rhs) -> int { + if (op == "=") return lhs == rhs ? 0 : 1; + if (op == "!=") return lhs != rhs ? 0 : 1; + if (op == "<") return lhs < rhs ? 0 : 1; // byte-wise (C locale) + if (op == ">") return lhs > rhs ? 0 : 1; + if (op == "-nt") return newer_than(lhs, rhs); + if (op == "-ot") return older_than(lhs, rhs); + if (op == "-ef") return same_file(lhs, rhs); + + // Integer comparison — both operands must be valid integers, else exit 2. + auto l = parse_int(lhs); + auto r = parse_int(rhs); + if (!l || !r) { + CFBOX_ERR("test", "integer expression expected"); + return 2; } - return eval_expr(args); + if (op == "-eq") return *l == *r ? 0 : 1; + if (op == "-ne") return *l != *r ? 0 : 1; + if (op == "-lt") return *l < *r ? 0 : 1; + if (op == "-le") return *l <= *r ? 0 : 1; + if (op == "-gt") return *l > *r ? 0 : 1; + if (op == "-ge") return *l >= *r ? 0 : 1; + CFBOX_ERR("test", "unknown binary operator '%.*s'", + static_cast(op.size()), op.data()); + return 2; } -auto eval_expr(const Args& args) -> int { - if (args.empty()) return 1; - - // Parenthesized expression - if (args[0] == "(" && args.size() >= 3 && args.back() == ")") { - // Find matching close paren - int depth = 0; - for (std::size_t i = 0; i < args.size(); ++i) { - if (args[i] == "(") ++depth; - else if (args[i] == ")") { - --depth; - if (depth == 0 && i == args.size() - 1) { - return eval_or(Args(args.begin() + 1, args.end() - 1)); - } - } +// Recursive-descent evaluator over the token stream. Returns POSIX exit codes: +// 0 = true, 1 = false, 2 = syntax / operand error. +// Precedence: -o (lowest) < -a (implicit AND between primaries) < ! < primary. +struct Evaluator { + const Args& a; + std::size_t p = 0; + + [[nodiscard]] auto at_end() const -> bool { return p >= a.size(); } + auto cur() const -> std::string_view { return a[p]; } + + auto parse_or() -> int { + int r = parse_and(); + while (r != 2 && !at_end() && cur() == "-o") { + ++p; + int rhs = parse_and(); + if (rhs == 2) return 2; + r = (r == 0 || rhs == 0) ? 0 : 1; // OR } + return r; } - // ! EXPR - if (args[0] == "!" && args.size() > 1) { - return eval_or(Args(args.begin() + 1, args.end())) == 0 ? 1 : 0; + auto parse_and() -> int { + int r = parse_not(); + // Continue while there is another primary (explicit -a or implicit AND). + while (r != 2 && !at_end() && cur() != ")" && cur() != "-o") { + if (cur() == "-a") ++p; // explicit; otherwise adjacent primaries AND + int rhs = parse_not(); + if (rhs == 2) return 2; + r = (r == 0 && rhs == 0) ? 0 : 1; // AND + } + return r; } - // Unary tests (2 args) - if (args.size() == 2) { - if (args[0] == "-z") return args[1].empty() ? 0 : 1; - if (args[0] == "-n") return args[1].empty() ? 1 : 0; - if (args[0].size() == 2 && args[0][0] == '-') { - char op = args[0][1]; - if (op == 'e' || op == 'f' || op == 'd' || op == 'r' || - op == 'w' || op == 'x' || op == 's' || op == 'L') { - return file_test(op, args[1]) ? 0 : 1; - } + auto parse_not() -> int { + // "!" is negation only when an operand follows; a trailing "!" is a + // non-empty string operand (POSIX: `test !` -> true). + if (!at_end() && cur() == "!" && p + 1 < a.size()) { + ++p; + int r = parse_not(); + return r == 2 ? 2 : (r == 0 ? 1 : 0); // NOT } - CFBOX_ERR("test", "unknown operator '%.*s'", static_cast(args[0].size()), args[0].data()); - return 2; + return parse_primary(); } - // Binary tests (3 args) - if (args.size() == 3) { - // String comparison - if (args[1] == "=") return args[0] == args[2] ? 0 : 1; - if (args[1] == "!=") return args[0] != args[2] ? 0 : 1; - - // Integer comparison - if (args[1] == "-eq") return to_int(args[0]) == to_int(args[2]) ? 0 : 1; - if (args[1] == "-ne") return to_int(args[0]) != to_int(args[2]) ? 0 : 1; - if (args[1] == "-lt") return to_int(args[0]) < to_int(args[2]) ? 0 : 1; - if (args[1] == "-le") return to_int(args[0]) <= to_int(args[2]) ? 0 : 1; - if (args[1] == "-gt") return to_int(args[0]) > to_int(args[2]) ? 0 : 1; - if (args[1] == "-ge") return to_int(args[0]) >= to_int(args[2]) ? 0 : 1; - - CFBOX_ERR("test", "unknown operator '%.*s'", static_cast(args[1].size()), args[1].data()); - return 2; - } + auto parse_primary() -> int { + if (at_end()) return 2; // missing operand - // Single arg: true if non-empty - if (args.size() == 1) { - return args[0].empty() ? 1 : 0; - } + if (cur() == "(") { + ++p; + int r = parse_or(); + if (r == 2) return 2; + if (at_end() || cur() != ")") return 2; // unmatched "(" + ++p; + return r; + } - // Fallback: try compound expression evaluation - return eval_or(args); -} + // Unary: -OP ARG + if (is_unary_op(cur())) { + if (p + 1 >= a.size()) { + CFBOX_ERR("test", "missing argument for '%.*s'", + static_cast(cur().size()), cur().data()); + return 2; + } + char op = cur()[1]; + std::string_view arg = a[p + 1]; + p += 2; + return eval_unary(op, arg); + } + // Binary: ARG OP ARG + if (p + 2 < a.size() && is_binary_op(a[p + 1])) { + std::string_view lhs = a[p]; + std::string_view op = a[p + 1]; + std::string_view rhs = a[p + 2]; + p += 3; + return eval_binary(lhs, op, rhs); + } + + // An unrecognized -X (alpha) is a syntax error, never a string operand. + if (cur().size() >= 2 && cur()[0] == '-' && + std::isalpha(static_cast(cur()[1]))) { + CFBOX_ERR("test", "unknown operator '%.*s'", + static_cast(cur().size()), cur().data()); + return 2; + } + + // Single argument: true if non-empty. + std::string_view s = cur(); + ++p; + return s.empty() ? 1 : 0; + } +}; } // namespace auto test_main(int argc, char* argv[]) -> int { - // Handle --help/--version before parsing - // test uses positional args only, so we do manual check + // test uses positional args only; intercept --help/--version manually. for (int i = 1; i < argc; ++i) { std::string_view arg{argv[i]}; if (arg == "--help") { cfbox::help::print_help(HELP); return 0; } if (arg == "--version") { cfbox::help::print_version(HELP); return 0; } } - // Build expression from args (skip argv[0]) Args expr_args; for (int i = 1; i < argc; ++i) { expr_args.emplace_back(argv[i]); } - // If invoked as "[", last arg must be "]" + // If invoked as "[", the last argument must be "]". std::string_view prog{argv[0]}; - if (!prog.empty() && prog.back() == '[') { - // Also match when basename is "[" - auto slash = prog.rfind('/'); - auto base = (slash != std::string_view::npos) ? prog.substr(slash + 1) : prog; - if (base == "[" || base == "[") { - if (expr_args.empty() || expr_args.back() != "]") { - CFBOX_ERR("[", "missing ']'"); - return 2; - } - expr_args.pop_back(); + auto slash = prog.rfind('/'); + auto base = (slash != std::string_view::npos) ? prog.substr(slash + 1) : prog; + if (base == "[") { + if (expr_args.empty() || expr_args.back() != "]") { + CFBOX_ERR("[", "missing ']'"); + return 2; } + expr_args.pop_back(); } - return eval_expr(expr_args); + // POSIX: an empty expression ([ ] or bare test) is false, not an error. + if (expr_args.empty()) return 1; + + Evaluator ev{expr_args, 0}; + int result = ev.parse_or(); + if (!ev.at_end()) { + // Leftover tokens (e.g. a stray ")") mean the expression was malformed. + return 2; + } + return result; } diff --git a/tests/integration/test_test.sh b/tests/integration/test_test.sh index 728ee7c..f4f3712 100755 --- a/tests/integration/test_test.sh +++ b/tests/integration/test_test.sh @@ -3,6 +3,8 @@ set -euo pipefail source "$(dirname "$0")/helpers.sh" pass=0 fail=0 +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT run_test() { local name="$1" expected="$2"; shift 2 @@ -59,5 +61,34 @@ run_test() { # Bracket form "$CFBOX" [ "abc" = "abc" ] ; ((++pass)) || { echo "FAIL: [ ] form"; ((++fail)); } +# --- POSIX three-state exit codes: 0 true / 1 false / 2 error --- +# invalid integer operand -> 2 +assert_exit 2 test abc -eq abc && ((++pass)) || { echo "FAIL: abc -eq abc should be 2"; ((++fail)); } +assert_exit 2 test 5 -eq 5x && ((++pass)) || { echo "FAIL: 5 -eq 5x should be 2"; ((++fail)); } + +# bare unary op without operand -> 2 +assert_exit 2 test -z && ((++pass)) || { echo "FAIL: bare -z should be 2"; ((++fail)); } + +# unknown operator -> 2 +assert_exit 2 test -q foo && ((++pass)) || { echo "FAIL: -q should be 2"; ((++fail)); } + +# stray close paren -> 2 +assert_exit 2 test a ')' && ((++pass)) || { echo "FAIL: stray ) should be 2"; ((++fail)); } + +# -h alias for -L (symlink) +ln -s /dev/null "$tmpdir/link" +assert_exit 0 test -h "$tmpdir/link" && ((++pass)) || { echo "FAIL: -h symlink"; ((++fail)); } + +# string < > (byte order) +assert_exit 0 test "a" "<" "b" && ((++pass)) || { echo "FAIL: str <"; ((++fail)); } +assert_exit 1 test "b" "<" "a" && ((++pass)) || { echo "FAIL: str < false"; ((++fail)); } + +# -ef same file +echo x > "$tmpdir/a.txt" +assert_exit 0 test "$tmpdir/a.txt" -ef "$tmpdir/a.txt" && ((++pass)) || { echo "FAIL: -ef same file"; ((++fail)); } + +# nested parens with OR +assert_exit 0 test '(' a = a -o b = c ')' && ((++pass)) || { echo "FAIL: nested parens OR"; ((++fail)); } + echo "test: $pass passed, $fail failed" [[ $fail -eq 0 ]] diff --git a/tests/unit/test_test.cpp b/tests/unit/test_test.cpp index 4f0f68c..35b2582 100644 --- a/tests/unit/test_test.cpp +++ b/tests/unit/test_test.cpp @@ -1,5 +1,9 @@ -#include +#include +#include + #include +#include +#include #include #include "test_capture.hpp" @@ -100,4 +104,89 @@ TEST(TestApplet, OrOperator) { EXPECT_EQ(test_main(8, argv), 0); } +// --- integer validation: invalid operands must exit 2, not silently coerce --- +TEST(TestApplet, IntInvalidNonNumeric) { + char a0[] = "test", a1[] = "abc", a2[] = "-eq", a3[] = "abc"; + char* argv[] = {a0, a1, a2, a3, nullptr}; + EXPECT_EQ(test_main(4, argv), 2); +} + +TEST(TestApplet, IntInvalidTrailingJunk) { + char a0[] = "test", a1[] = "5", a2[] = "-eq", a3[] = "5x"; + char* argv[] = {a0, a1, a2, a3, nullptr}; + EXPECT_EQ(test_main(4, argv), 2); +} + +// --- operand/arity errors -> exit 2 --- +TEST(TestApplet, BareUnaryNoOperand) { + char a0[] = "test", a1[] = "-z"; + char* argv[] = {a0, a1, nullptr}; + EXPECT_EQ(test_main(2, argv), 2); +} + +TEST(TestApplet, UnknownOperator) { + char a0[] = "test", a1[] = "-q", a2[] = "foo"; + char* argv[] = {a0, a1, a2, nullptr}; + EXPECT_EQ(test_main(3, argv), 2); +} + +TEST(TestApplet, UnmatchedOpenParen) { + char a0[] = "test", a1[] = "(", a2[] = "a"; + char* argv[] = {a0, a1, a2, nullptr}; + EXPECT_EQ(test_main(3, argv), 2); +} + +TEST(TestApplet, StrayCloseParen) { + char a0[] = "test", a1[] = "a", a2[] = ")"; + char* argv[] = {a0, a1, a2, nullptr}; + EXPECT_EQ(test_main(3, argv), 2); +} + +// --- trailing "!" is a non-empty string operand (POSIX: `test !` -> true) --- +TEST(TestApplet, TrailingBangIsString) { + char a0[] = "test", a1[] = "!"; + char* argv[] = {a0, a1, nullptr}; + EXPECT_EQ(test_main(2, argv), 0); +} + +// --- new operators: string < >, file -nt/-ef --- +TEST(TestApplet, StringLessThan) { + char a0[] = "test", a1[] = "a", a2[] = "<", a3[] = "b"; + char* argv[] = {a0, a1, a2, a3, nullptr}; + EXPECT_EQ(test_main(4, argv), 0); +} + +TEST(TestApplet, NestedOrInParens) { + char a0[] = "test", a1[] = "(", a2[] = "a", a3[] = "=", a4[] = "a", + a5[] = "-o", a6[] = "b", a7[] = "=", a8[] = "c", a9[] = ")"; + char* argv[] = {a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, nullptr}; + EXPECT_EQ(test_main(10, argv), 0); // (a=a OR b=c) -> true +} + +TEST(TestApplet, EfSameFile) { + TempDir tmp; + auto f = tmp.write_file("a.txt", "x"); + char a0[] = "test"; + char a1[256], a2[] = "-ef", a3[256]; + std::snprintf(a1, sizeof(a1), "%s", f.c_str()); + std::snprintf(a3, sizeof(a3), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3, nullptr}; + EXPECT_EQ(test_main(4, argv), 0); // same file -ef -> true +} + +TEST(TestApplet, NewerThan) { + TempDir tmp; + auto old_f = tmp.write_file("old.txt", "x"); + auto new_f = tmp.write_file("new.txt", "y"); + struct timespec past[2] = {{1, 0}, {1, 0}}; + auto r = cfbox::fs::set_times(old_f, past, /*no_follow=*/false); + (void)r; + char a0[] = "test"; + char a1[256], a2[] = "-nt", a3[256]; + std::snprintf(a1, sizeof(a1), "%s", new_f.c_str()); + std::snprintf(a3, sizeof(a3), "%s", old_f.c_str()); + char* argv[] = {a0, a1, a2, a3, nullptr}; + EXPECT_EQ(test_main(4, argv), 0); // new -nt old -> true +} + #endif // CFBOX_ENABLE_TEST From bbac8920896aa2fc0565220105250e9e98260b1a Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:40:30 +0800 Subject: [PATCH 04/21] docs(plan): record test POSIX batch done --- document/ai/PLAN.md | 4 ++-- document/notes/2026-06-28-test-posix.md | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 document/notes/2026-06-28-test-posix.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 6bfba9e..83fa192 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批3 `test` POSIX 三态(整数校验 + 退出码 0/1/2 + 补 -h/-nt/-ot/-ef + 递归下降)。 +> 焦点 → Phase 2 批4 `ls -R` 递归 + `--color` 三态(固定色,LS_COLORS 延后)。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -21,7 +21,7 @@ |----|------|------|--------|------| | 批1 | `tail -f/-F`(fd-based follow:fstat 轮询 + 64KiB quantum + -F drain-switch + SIGINT 退出 0) | ✅ | bff34e9 | 381/0 | | 批2 | `cp -a`(归档模式:保权限/属主/时间戳/symlink/递归) | ✅ | a3b89ed | 406/0 | -| 批3 | `test` POSIX 子集(文件测试/字符串/整数/复合表达式,退出码语义) | ⏳ | — | — | +| 批3 | `test` POSIX 子集(文件测试/字符串/整数/复合表达式,退出码语义) | ✅ | 0f9b3cb | 417/0 | | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ⏳ | — | — | | 批5+ | grep -A/-B/-C、find 布尔表达式、sh 深化(按运维频率排) | ⏳ | — | — | diff --git a/document/notes/2026-06-28-test-posix.md b/document/notes/2026-06-28-test-posix.md new file mode 100644 index 0000000..9ec6079 --- /dev/null +++ b/document/notes/2026-06-28-test-posix.md @@ -0,0 +1,23 @@ +# 2026-06-28 — test POSIX 三态 + 递归下降(Phase 2 批3) + +## 背景 +test 退出码偏离 POSIX:`to_int` 用 strtol 不校验,`abc -eq abc` 误判 0(应 2);裸 `-z` 无操作数当单参非空→true(应 2);文件操作符仅 8 个;解析靠"线性扫第一个 -o/-a 切分",对复合表达式脆弱。 + +## 设计决策 +- **递归下降解析器**(`Evaluator` struct + token 游标 `p`):`parse_or → parse_and`(隐式 AND:相邻 primary 自动 AND)`→ parse_not → parse_primary`。三态 int 返回(0 true / 1 false / 2 error),OR/AND/NOT 各自合并规则。 +- **整数严格校验**:弃 strtol,改 `std::from_chars` + 全串消费检查。`abc`/`5x`/空 → nullopt → exit 2。 +- **arity/语法错误统一 exit 2**:裸一元操作符无操作数、未知 `-X`(alpha) 操作符、未配对 `(`/多余 `)`、表达式尾部残留 token。 +- **操作符扩充**:文件 `-h`(=-L 别名)/`-b`/`-c`/`-p`/`-S`;文件比较 `-nt`/`-ot`(mtime)/`-ef`(同 dev+inode);字符串 `<`/`>`(字节序)。 +- **`!` 的双语义**:有后继时是否定;无后继(`test !`)是非空字符串操作数→true(POSIX)。 +- **顶层空表达式**(`[ ]`/裸 test)→ false(1),非 error;子表达式空(`()`)→ error(2)。 + +## 验证 +- GTest +11,全量 **417/0**。 +- 集成 test_test.sh 改造:引入 `assert_exit` 精确断 0/1/2(原 `&&/||` 会吞 exit 2),+14 case,31 passed。 +- size-opt **422 KB**(持平——递归下降替代线性切分抵消新操作符体积)。 + +## 陷阱 +- `test -5`(负数单参)→ true(0):第二字符非 alpha 不触发"未知操作符",当非空字符串(符合 coreutils)。负数作整数操作数 `-5 -eq 5` 正常(二元检测在先)。 +- `<`/`>` 在 shell 需转义/引号,是 shell 词法非 test 职责;操作数恰好是 `-a`/`-o`/`-e` 等需引号(POSIX test 已知限制)。 +- 未补 `-g/-u/-k/-O/-G/-t`(setuid 位/tty,低频),按 POSIX 子集后置。 +- commit: `0f9b3cb` From 6dfe3295866ed73ae0eece5484a857b6e393a1b5 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:51:15 +0800 Subject: [PATCH 05/21] feat(ls): add -R recursive listing and --color, fix perm format - -R/--recursive: GNU-style blocks with 'path:' headers, blank-line separated - --color[=always|auto|never]: type-based ANSI colors (isatty gates auto) - symlinked directories not descended into (cycle-safe) - extract print_entry/collect_visible to dedup long/short/single-file paths - fix format_permissions emitting a stray '-' before the type char (ls -l showed 'd-rwxr-xr-x' instead of 'drwxr-xr-x') --- src/applets/ls.cpp | 318 ++++++++++++++++++++--------------- tests/integration/test_ls.sh | 30 ++++ tests/unit/test_ls.cpp | 95 ++++++++++- 3 files changed, 304 insertions(+), 139 deletions(-) diff --git a/src/applets/ls.cpp b/src/applets/ls.cpp index c8403f4..cd21dc3 100644 --- a/src/applets/ls.cpp +++ b/src/applets/ls.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -35,20 +36,18 @@ auto format_size_human(std::uintmax_t bytes) -> std::string { } auto format_permissions(std::filesystem::perms p) -> std::string { - char buf[11]; - buf[0] = '-'; // will be overridden for special types - - buf[1] = (p & std::filesystem::perms::owner_read) != std::filesystem::perms::none ? 'r' : '-'; - buf[2] = (p & std::filesystem::perms::owner_write) != std::filesystem::perms::none ? 'w' : '-'; - buf[3] = (p & std::filesystem::perms::owner_exec) != std::filesystem::perms::none ? 'x' : '-'; - buf[4] = (p & std::filesystem::perms::group_read) != std::filesystem::perms::none ? 'r' : '-'; - buf[5] = (p & std::filesystem::perms::group_write) != std::filesystem::perms::none ? 'w' : '-'; - buf[6] = (p & std::filesystem::perms::group_exec) != std::filesystem::perms::none ? 'x' : '-'; - buf[7] = (p & std::filesystem::perms::others_read) != std::filesystem::perms::none ? 'r' : '-'; - buf[8] = (p & std::filesystem::perms::others_write) != std::filesystem::perms::none ? 'w' : '-'; - buf[9] = (p & std::filesystem::perms::others_exec) != std::filesystem::perms::none ? 'x' : '-'; - buf[10] = '\0'; - return std::string{buf, 10}; + char buf[10]; + buf[0] = (p & std::filesystem::perms::owner_read) != std::filesystem::perms::none ? 'r' : '-'; + buf[1] = (p & std::filesystem::perms::owner_write) != std::filesystem::perms::none ? 'w' : '-'; + buf[2] = (p & std::filesystem::perms::owner_exec) != std::filesystem::perms::none ? 'x' : '-'; + buf[3] = (p & std::filesystem::perms::group_read) != std::filesystem::perms::none ? 'r' : '-'; + buf[4] = (p & std::filesystem::perms::group_write) != std::filesystem::perms::none ? 'w' : '-'; + buf[5] = (p & std::filesystem::perms::group_exec) != std::filesystem::perms::none ? 'x' : '-'; + buf[6] = (p & std::filesystem::perms::others_read) != std::filesystem::perms::none ? 'r' : '-'; + buf[7] = (p & std::filesystem::perms::others_write) != std::filesystem::perms::none ? 'w' : '-'; + buf[8] = (p & std::filesystem::perms::others_exec) != std::filesystem::perms::none ? 'x' : '-'; + buf[9] = '\0'; + return std::string{buf, 9}; // type char is prepended by the caller } auto format_type_char(std::filesystem::file_type type) -> char { @@ -94,152 +93,151 @@ auto group_of(gid_t gid) -> std::string { return std::to_string(gid); } +enum class ColorMode { Never, Auto, Always }; + struct LsOptions { bool all = false; // -a bool long_format = false; // -l bool human = false; // -h + bool recursive = false; // -R }; -auto list_directory(const std::string& path, const LsOptions& opts) -> int { - auto entries_result = cfbox::fs::directory_entries(path); - if (!entries_result) { - CFBOX_ERR("ls", "cannot access '%s': %s", path.c_str(), entries_result.error().msg.c_str()); - return 1; +// ANSI SGR code for a file type, or nullptr when no coloring applies. Type-based +// fixed colors only (LS_COLORS glob parsing is deferred to control binary size). +auto color_code(std::filesystem::file_type type, std::filesystem::perms perms, bool use_color) -> const char* { + if (!use_color) return nullptr; + using ft = std::filesystem::file_type; + using pm = std::filesystem::perms; + switch (type) { + case ft::directory: return "01;34"; // bold blue + case ft::symlink: return "01;36"; // bold cyan + case ft::fifo: return "33"; // yellow + case ft::socket: return "01;35"; // bold magenta + case ft::block: + case ft::character: return "01;33"; // bold yellow + case ft::regular: { + bool exec = (perms & (pm::owner_exec | pm::group_exec | pm::others_exec)) != pm::none; + return exec ? "01;32" : nullptr; // bold green + } + default: return nullptr; } +} - auto& entries = entries_result.value(); +auto colorize(const std::string& name, std::filesystem::file_type type, + std::filesystem::perms perms, bool use_color) -> std::string { + const char* code = color_code(type, perms, use_color); + if (!code) return name; + return "\033[" + std::string{code} + "m" + name + "\033[0m"; +} - // Filter hidden files if -a not set +// Read a directory, drop hidden entries (unless -a), and sort by name. +auto collect_visible(const std::string& path, const LsOptions& opts) + -> cfbox::base::Result> { + auto entries = cfbox::fs::directory_entries(path); + if (!entries) return std::unexpected(entries.error()); std::vector visible; - visible.reserve(entries.size()); - for (const auto& e : entries) { + visible.reserve(entries->size()); + for (const auto& e : *entries) { std::string name = e.path().filename().string(); - if (!opts.all && !name.empty() && name[0] == '.') - continue; + if (!opts.all && !name.empty() && name[0] == '.') continue; visible.push_back(e); } + std::sort(visible.begin(), visible.end(), + [](const std::filesystem::directory_entry& a, const std::filesystem::directory_entry& b) { + return a.path().filename().string() < b.path().filename().string(); + }); + return visible; +} - // Sort entries - std::sort( - visible.begin(), visible.end(), - [](const std::filesystem::directory_entry& a, const std::filesystem::directory_entry& b) { - return a.path().filename().string() < b.path().filename().string(); - }); - - if (opts.long_format) { - for (const auto& e : visible) { - auto status_result = cfbox::fs::symlink_status(e.path().string()); - if (!status_result) - continue; - auto& st = status_result.value(); - - char type_char = format_type_char(st.type()); - auto perms = format_permissions(st.permissions()); - perms.insert(perms.begin(), type_char); +// Print a single entry (file or directory member) in short or long form. +// The name is colorized when use_color is set; the symlink "-> target" suffix +// in long form is appended after the colorized name. +auto print_entry(const std::string& path, const LsOptions& opts, bool use_color) -> void { + auto st_r = cfbox::fs::symlink_status(path); + std::filesystem::file_type type = st_r ? st_r->type() : std::filesystem::file_type::none; + std::filesystem::perms perms = st_r ? st_r->permissions() : std::filesystem::perms::none; + std::string name = std::filesystem::path{path}.filename().string(); + std::string display = colorize(name, type, perms, use_color); + + if (!opts.long_format) { + std::printf("%s\n", display.c_str()); + return; + } - auto nlinks_result = cfbox::fs::hard_link_count(e.path().string()); - std::uintmax_t nlinks = nlinks_result.value_or(1); + char type_char = format_type_char(type); + auto perm_str = format_permissions(perms); + perm_str.insert(perm_str.begin(), type_char); - std::uintmax_t size = 0; - if (st.type() == std::filesystem::file_type::regular) { - auto sz = cfbox::fs::file_size(e.path().string()); - size = sz.value_or(0); - } + auto nlinks = cfbox::fs::hard_link_count(path).value_or(1); + std::uintmax_t size = 0; + if (type == std::filesystem::file_type::regular) { + size = cfbox::fs::file_size(path).value_or(0); + } + auto time_r = cfbox::fs::last_write_time(path); + std::string time_str = time_r ? format_time(*time_r) : ""; + std::string size_str = opts.human ? format_size_human(size) : std::to_string(size); + + std::string owner = "?"; + std::string group = "?"; + struct stat lst {}; + if (::lstat(path.c_str(), &lst) == 0) { + owner = owner_of(lst.st_uid); + group = group_of(lst.st_gid); + } - auto time_result = cfbox::fs::last_write_time(e.path().string()); - std::string time_str = time_result ? format_time(*time_result) : ""; + if (type == std::filesystem::file_type::symlink) { + std::error_code ec; + auto target = std::filesystem::read_symlink(std::filesystem::path{path}, ec); + if (!ec) display += " -> " + target.string(); + } - std::string size_str; - if (opts.human) { - size_str = format_size_human(size); - } else { - size_str = std::to_string(size); - } + std::printf("%s %3ju %-8s %-8s %*s %s %s\n", perm_str.c_str(), + static_cast(nlinks), owner.c_str(), group.c_str(), + opts.human ? 5 : 8, size_str.c_str(), time_str.c_str(), display.c_str()); +} - std::string name = e.path().filename().string(); - std::string owner = "?"; - std::string group = "?"; - struct stat lst; - if (::lstat(e.path().string().c_str(), &lst) == 0) { - owner = owner_of(lst.st_uid); - group = group_of(lst.st_gid); - } - if (st.type() == std::filesystem::file_type::symlink) { - std::error_code ec; - auto target = std::filesystem::read_symlink(e.path(), ec); - if (!ec) { - name += " -> " + target.string(); - } - } +auto list_directory(const std::string& path, const LsOptions& opts, bool use_color) -> int { + auto visible = collect_visible(path, opts); + if (!visible) { + CFBOX_ERR("ls", "cannot access '%s': %s", path.c_str(), visible.error().msg.c_str()); + return 1; + } + for (const auto& e : *visible) { + print_entry(e.path().string(), opts, use_color); + } + return 0; +} - std::printf("%s %3ju %-8s %-8s %*s %s %s\n", perms.c_str(), - static_cast(nlinks), owner.c_str(), group.c_str(), - opts.human ? 5 : 8, size_str.c_str(), time_str.c_str(), name.c_str()); - } - } else { - for (const auto& e : visible) { - std::printf("%s\n", e.path().filename().string().c_str()); +// GNU-style recursive listing: each directory is its own block with a "path:" +// header, blocks separated by a blank line. Symlinked directories are NOT +// descended into (matches GNU ls and prevents cycles). +auto list_recursive(const std::string& path, const LsOptions& opts, bool use_color, bool leading_blank) -> int { + if (leading_blank) std::printf("\n"); + std::printf("%s:\n", path.c_str()); + int rc = list_directory(path, opts, use_color); + + auto visible = collect_visible(path, opts); + if (!visible) return rc; + for (const auto& e : *visible) { + auto st = cfbox::fs::symlink_status(e.path().string()); + if (st && st->type() == std::filesystem::file_type::directory) { + if (list_recursive(e.path().string(), opts, use_color, true) != 0) rc = 1; } } - - return 0; + return rc; } -auto list_path(const std::string& path, const LsOptions& opts, bool show_header) -> int { +auto list_path(const std::string& path, const LsOptions& opts, bool use_color, bool show_header) -> int { if (!cfbox::fs::exists(path)) { CFBOX_ERR("ls", "cannot access '%s': No such file or directory", path.c_str()); return 1; } - if (!cfbox::fs::is_directory(path)) { - // Single file - if (opts.long_format) { - auto status_result = cfbox::fs::symlink_status(path); - if (!status_result) { - CFBOX_ERR("ls", "%s", status_result.error().msg.c_str()); - return 1; - } - auto& st = status_result.value(); - char type_char = format_type_char(st.type()); - auto perms = format_permissions(st.permissions()); - perms.insert(perms.begin(), type_char); - - auto nlinks_result = cfbox::fs::hard_link_count(path); - std::uintmax_t nlinks = nlinks_result.value_or(1); - - std::uintmax_t size = 0; - if (st.type() == std::filesystem::file_type::regular) { - auto sz = cfbox::fs::file_size(path); - size = sz.value_or(0); - } - - auto time_result = cfbox::fs::last_write_time(path); - std::string time_str = time_result ? format_time(*time_result) : ""; - - std::string size_str = opts.human ? format_size_human(size) : std::to_string(size); - std::string name = std::filesystem::path{path}.filename().string(); - std::string owner = "?"; - std::string group = "?"; - struct stat lst; - if (::lstat(path.c_str(), &lst) == 0) { - owner = owner_of(lst.st_uid); - group = group_of(lst.st_gid); - } - - std::printf("%s %3ju %-8s %-8s %*s %s %s\n", perms.c_str(), - static_cast(nlinks), owner.c_str(), group.c_str(), - opts.human ? 5 : 8, size_str.c_str(), time_str.c_str(), name.c_str()); - } else { - auto fname = std::filesystem::path{path}.filename().string(); - std::printf("%s\n", fname.c_str()); - } + print_entry(path, opts, use_color); return 0; } - - if (show_header) { - std::printf("%s:\n", path.c_str()); - } - return list_directory(path, opts); + if (show_header) std::printf("%s:\n", path.c_str()); + return list_directory(path, opts, use_color); } constexpr cfbox::help::HelpEntry HELP = { @@ -247,9 +245,11 @@ constexpr cfbox::help::HelpEntry HELP = { .version = CFBOX_VERSION_STRING, .one_line = "list directory contents", .usage = "ls [OPTIONS] [FILE]...", - .options = " -a do not ignore entries starting with .\n" - " -l use a long listing format\n" - " -h print sizes in human readable format", + .options = " -a, --all do not ignore entries starting with .\n" + " -l, --long use a long listing format\n" + " -h, --human-readable print sizes in human readable format\n" + " -R, --recursive list subdirectories recursively\n" + " --color[=WHEN] colorize output (always/auto/never)", .extra = "", }; @@ -261,6 +261,10 @@ auto ls_main(int argc, char* argv[]) -> int { cfbox::args::OptSpec{'a', false, "all"}, cfbox::args::OptSpec{'l', false, "long"}, cfbox::args::OptSpec{'h', false, "human-readable"}, + cfbox::args::OptSpec{'R', false, "recursive"}, + // --color is long-only with an optional value; left + // unregistered so parse records it without eating the + // next positional argument as its value. }); if (parsed.has_long("help")) { @@ -276,19 +280,57 @@ auto ls_main(int argc, char* argv[]) -> int { opts.all = parsed.has('a'); opts.long_format = parsed.has('l'); opts.human = parsed.has('h'); + opts.recursive = parsed.has('R') || parsed.has_long("recursive"); + + // --color: a bare flag means "always"; otherwise honor always/auto/never. + ColorMode color = ColorMode::Auto; + if (parsed.has_long("color")) { + auto v = parsed.get_long("color"); + if (!v || *v == "always") color = ColorMode::Always; + else if (*v == "never") color = ColorMode::Never; + else if (*v == "auto") color = ColorMode::Auto; + else { + CFBOX_ERR("ls", "invalid argument '%.*s' for '--color'", + static_cast(v->size()), v->data()); + return 2; + } + } + bool use_color = (color == ColorMode::Always) || + (color == ColorMode::Auto && ::isatty(STDOUT_FILENO) == 1); const auto& pos = parsed.positional(); - bool multi = pos.size() > 1; - if (pos.empty()) { - return list_path(".", opts, false); + if (opts.recursive) { + if (pos.empty()) { + return list_recursive(".", opts, use_color, /*leading_blank=*/false); + } + int rc = 0; + bool first = true; + for (const auto& p : pos) { + std::string path{p}; + if (!cfbox::fs::exists(path)) { + CFBOX_ERR("ls", "cannot access '%s': No such file or directory", path.c_str()); + rc = 1; + first = false; + continue; + } + if (!cfbox::fs::is_directory(path)) { + print_entry(path, opts, use_color); + } else { + if (list_recursive(path, opts, use_color, /*leading_blank=*/!first) != 0) rc = 1; + } + first = false; + } + return rc; } + bool multi = pos.size() > 1; + if (pos.empty()) { + return list_path(".", opts, use_color, false); + } int rc = 0; for (const auto& p : pos) { - if (list_path(std::string{p}, opts, multi) != 0) { - rc = 1; - } + if (list_path(std::string{p}, opts, use_color, multi) != 0) rc = 1; } return rc; } diff --git a/tests/integration/test_ls.sh b/tests/integration/test_ls.sh index 9bdb44c..c060133 100755 --- a/tests/integration/test_ls.sh +++ b/tests/integration/test_ls.sh @@ -88,5 +88,35 @@ else ((++fail)) fi +# ls -R recursive into nested dirs +mkdir -p "$tmpdir/recur/sub/nested" +echo "top" > "$tmpdir/recur/top.txt" +echo "deep" > "$tmpdir/recur/sub/nested/deep.txt" +actual=$("$CFBOX" ls -R "$tmpdir/recur") +if [[ "$actual" == *"top.txt"* ]] && [[ "$actual" == *"deep.txt"* ]] && [[ "$actual" == *"sub"* ]]; then + ((++pass)) +else + echo "FAIL [ls -R]: got $(printf '%q' "$actual")" + ((++fail)) +fi + +# ls --color=always emits ANSI even when piped (not a tty) +actual=$("$CFBOX" ls --color=always "$tmpdir/lsdir") +if [[ "$actual" == *$'\033'* ]]; then + ((++pass)) +else + echo "FAIL [ls --color=always should emit ANSI]" + ((++fail)) +fi + +# ls --color=auto piped (not a tty) -> no ANSI +actual=$("$CFBOX" ls --color=auto "$tmpdir/lsdir") +if [[ "$actual" != *$'\033'* ]]; then + ((++pass)) +else + echo "FAIL [ls --color=auto piped should have no ANSI]" + ((++fail)) +fi + echo "ls: $pass passed, $fail failed" [[ $fail -eq 0 ]] diff --git a/tests/unit/test_ls.cpp b/tests/unit/test_ls.cpp index 732019e..437b4d0 100644 --- a/tests/unit/test_ls.cpp +++ b/tests/unit/test_ls.cpp @@ -1,7 +1,10 @@ +#include +#include + +#include #include #include #include "test_capture.hpp" -#include #if CFBOX_ENABLE_LS @@ -75,4 +78,94 @@ TEST(LsTest, SingleFileNoDirectory) { EXPECT_EQ(out, "only.txt\n"); } +TEST(LsTest, RecursiveListsNestedDirs) { + TempDir tmp; + std::filesystem::create_directories(tmp.path / "a" / "b"); + tmp.write_file("a/d.txt", ""); + tmp.write_file("a/b/c.txt", ""); + auto a = (tmp.path / "a").string(); + char a0[] = "ls", a1[] = "-R", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", a.c_str()); + char* argv[] = {a0, a1, a2}; + auto out = capture_stdout([&]{ return ls_main(3, argv); }); + EXPECT_NE(out.find("d.txt"), std::string::npos); // top-level entry + EXPECT_NE(out.find("c.txt"), std::string::npos); // proves descent into a/b +} + +TEST(LsTest, RecursiveDoesNotFollowSymlinkDir) { + TempDir tmp; + std::filesystem::create_directories(tmp.path / "real"); + tmp.write_file("real/f.txt", ""); + std::filesystem::create_symlink(tmp.path / "real", tmp.path / "link"); + auto top = tmp.path.string(); + char a0[] = "ls", a1[] = "-R", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", top.c_str()); + char* argv[] = {a0, a1, a2}; + auto out = capture_stdout([&]{ return ls_main(3, argv); }); + // f.txt must appear exactly once (under real/); following the symlink would + // duplicate it via the link -> real descent. + auto count = [](const std::string& s, const std::string& sub) { + int n = 0; + std::string::size_type p = 0; + while ((p = s.find(sub, p)) != std::string::npos) { ++n; p += sub.size(); } + return n; + }; + EXPECT_EQ(count(out, "f.txt"), 1); +} + +TEST(LsTest, RecursivePlusLong) { + TempDir tmp; + std::filesystem::create_directories(tmp.path / "a" / "b"); + tmp.write_file("a/f.txt", ""); + auto a = (tmp.path / "a").string(); + char a0[] = "ls", a1[] = "-Rl", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", a.c_str()); + char* argv[] = {a0, a1, a2}; + auto out = capture_stdout([&]{ return ls_main(3, argv); }); + EXPECT_NE(out.find("drwx"), std::string::npos); // directory long-format lines + EXPECT_NE(out.find("-rw"), std::string::npos); // regular file f.txt +} + +TEST(LsTest, ColorAlwaysWrapsDirectory) { + TempDir tmp; + std::filesystem::create_directory(tmp.path / "sub"); + auto dir = tmp.path.string(); + char a0[] = "ls", a1[] = "--color=always", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", dir.c_str()); + char* argv[] = {a0, a1, a2}; + auto out = capture_stdout([&]{ return ls_main(3, argv); }); + EXPECT_NE(out.find("\033[01;34m"), std::string::npos); // dir = bold blue +} + +TEST(LsTest, ColorAutoOffWhenNotTty) { + TempDir tmp; + tmp.write_file("f.txt", ""); + auto dir = tmp.path.string(); + char a0[] = "ls", a1[] = "--color=auto", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", dir.c_str()); + char* argv[] = {a0, a1, a2}; + auto out = capture_stdout([&]{ return ls_main(3, argv); }); + EXPECT_EQ(out.find("\033["), std::string::npos); // capture_stdout is non-tty +} + +TEST(LsTest, ColorNeverExplicit) { + TempDir tmp; + std::filesystem::create_directory(tmp.path / "sub"); + auto dir = tmp.path.string(); + char a0[] = "ls", a1[] = "--color=never", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", dir.c_str()); + char* argv[] = {a0, a1, a2}; + auto out = capture_stdout([&]{ return ls_main(3, argv); }); + EXPECT_EQ(out.find("\033["), std::string::npos); +} + +TEST(LsTest, ColorInvalidValueExits2) { + TempDir tmp; + auto dir = tmp.path.string(); + char a0[] = "ls", a1[] = "--color=bogus", a2[256]; + std::snprintf(a2, sizeof(a2), "%s", dir.c_str()); + char* argv[] = {a0, a1, a2}; + EXPECT_EQ(ls_main(3, argv), 2); +} + #endif // CFBOX_ENABLE_LS From 69f88e6f1900ecf55e7a31bf17dcc41ef592337c Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:52:16 +0800 Subject: [PATCH 06/21] docs(plan): record ls recursive+color batch done --- document/ai/PLAN.md | 4 ++-- .../notes/2026-06-28-ls-recursive-color.md | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 document/notes/2026-06-28-ls-recursive-color.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 83fa192..417ad74 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批4 `ls -R` 递归 + `--color` 三态(固定色,LS_COLORS 延后)。 +> 焦点 → Phase 2 批5 `grep -A/-B/-C` 上下文窗口(ring 向前 + after_pending 向后 + 组间 `--`)。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -22,7 +22,7 @@ | 批1 | `tail -f/-F`(fd-based follow:fstat 轮询 + 64KiB quantum + -F drain-switch + SIGINT 退出 0) | ✅ | bff34e9 | 381/0 | | 批2 | `cp -a`(归档模式:保权限/属主/时间戳/symlink/递归) | ✅ | a3b89ed | 406/0 | | 批3 | `test` POSIX 子集(文件测试/字符串/整数/复合表达式,退出码语义) | ✅ | 0f9b3cb | 417/0 | -| 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ⏳ | — | — | +| 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ✅ | 6dfe329 | 424/0 | | 批5+ | grep -A/-B/-C、find 布尔表达式、sh 深化(按运维频率排) | ⏳ | — | — | > 各批细节(触及文件、Result 签名草案、完成门、gotcha)由 `/next <批>` 现场产出脚手架,确认后写入本表 commit/测试列。 diff --git a/document/notes/2026-06-28-ls-recursive-color.md b/document/notes/2026-06-28-ls-recursive-color.md new file mode 100644 index 0000000..3d9c10d --- /dev/null +++ b/document/notes/2026-06-28-ls-recursive-color.md @@ -0,0 +1,22 @@ +# 2026-06-28 — ls -R 递归 + --color(Phase 2 批4) + +## 背景 +ls 仅支持 -a/-l/-h,无递归、无着色(全仓零 ANSI)。顺带修一个潜伏格式 bug。 + +## 设计决策 +- **-R 递归 DFS**(`list_recursive`):GNU 风格分块——每个目录 `path:` 头 + 内容,块间空行。**不跟随 symlink 目录**(symlink_status 判定,规避环形 fs 无限递归)。 +- **`--color[=always|auto|never]`**:args.hpp **不注册** --color(避免 has_value 贪婪吃下一个 positional 当值),手动用 `has_long`/`get_long` 解析;bare `--color`=always。auto 由 `isatty(STDOUT)` 控制。 +- **固定 type→色映射**(dir=01;34 蓝 / exec=01;32 绿 / symlink=01;36 青 / fifo / socket / 设备=黄系)。LS_COLORS glob 解析**延后**(体积红线 + 收益低)。 +- **提取 `print_entry`/`collect_visible`**:消除原 list_directory 与 list_path 单文件的长格式重复(DRY)。 +- **修潜伏 bug**:`format_permissions` 返回 10 字符(含前导占位 `-`),调用方又 insert type_char → `ls -l` 输出 `d-rwxr-xr-x`(多一 `-`)。改为返回 9 字符、type_char 由调用方前缀 → `drwxr-xr-x`(coreutils 兼容)。 + +## 验证 +- GTest +7(RecursiveListsNestedDirs/DoesNotFollowSymlinkDir/PlusLong、ColorAlways/AutoOff/Never/InvalidValue),全量 **424/0**。 +- 集成 test_ls.sh +3(-R 递归、--color=always 出 ANSI、--color=auto 管道无 ANSI),10 passed。 +- size-opt **422 KB**(持平)。 + +## 陷阱 +- `capture_stdout`(test_capture.hpp)用 dup2 重定向到文件 → isatty=0 → auto 自动关色,既有 6 GTest 零回归;着色用例一律 `--color=always`。 +- --color 不注册到 args specs 是刻意为之;若将来 args.hpp 支持长选项可选值,可改回注册。 +- LS_COLORS 解析未做,--color 仅 type-based 固定色;用户自定义 `*.ext` 着色暂不支持(后置)。 +- commit: `6dfe329` From b6920c30210dfd9e9b41447431a98d123b5a2825 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:58:48 +0800 Subject: [PATCH 07/21] feat(grep): add -A/-B/-C context lines with group separators - -A/-B/-C NUM: trailing/leading/both context; -C sets A=B - leading context via a bounded ring buffer, trailing via after_pending - '--' separator between non-contiguous match groups - context lines honor -n line numbers and multi-file path: prefix - context disabled under -q/-l/-c (only counting/quiet apply) - default path (no -A/-B/-C) byte-identical to before --- src/applets/grep.cpp | 90 +++++++++++++++++++++++++++++----- tests/integration/test_grep.sh | 29 +++++++++++ tests/unit/test_grep.cpp | 76 +++++++++++++++++++++++++++- 3 files changed, 181 insertions(+), 14 deletions(-) diff --git a/src/applets/grep.cpp b/src/applets/grep.cpp index 637c17a..3c783d7 100644 --- a/src/applets/grep.cpp +++ b/src/applets/grep.cpp @@ -1,19 +1,19 @@ // grep — search patterns in text -// Supported flags: -E (extended regex), -i (ignore case), -v (invert match), -// -n (line numbers), -r (recursive), -c (count only), -// -l (files with matches), -q (quiet) +// Supported flags: -E -i -v -n -r -c -l -q, plus -A/-B/-C context lines. #include +#include #include #include #include +#include #include #include +#include #include #include #include -#include namespace { @@ -29,7 +29,10 @@ constexpr cfbox::help::HelpEntry HELP = { " -r recursive search\n" " -c print only a count of matching lines\n" " -l print only names of files with matches\n" - " -q quiet mode", + " -q quiet mode\n" + " -A NUM lines of trailing context\n" + " -B NUM lines of leading context\n" + " -C NUM lines of leading and trailing context", .extra = "", }; @@ -42,6 +45,8 @@ struct GrepOptions { bool count_only = false; bool files_with_matches = false; bool quiet = false; + int after = 0; + int before = 0; }; auto grep_file(const std::string& pattern, const GrepOptions& opts, @@ -59,6 +64,30 @@ auto grep_file(const std::string& pattern, const GrepOptions& opts, int found_any = 0; std::size_t line_num = 0; + const int after = opts.after; + const int before = opts.before; + const bool printing = !opts.quiet && !opts.files_with_matches && !opts.count_only; + + // Context-window state. before_buf holds up to `before` recent non-matching + // lines (flushed when a match prints them as leading context); after_pending + // counts lines still to emit after a match; need_separator + last_printed + // detect non-contiguous groups so a "--" separator is printed between them. + std::vector> before_buf; + int after_pending = 0; + bool need_separator = false; + std::size_t last_printed = 0; + + auto emit = [&](std::size_t ln, const std::string& content) { + if (need_separator && ln != last_printed + 1) { + std::printf("--\n"); + } + if (print_filename) std::printf("%s:", path.data()); + if (opts.line_numbers) std::printf("%zu:", ln); + std::printf("%s\n", content.c_str()); + last_printed = ln; + need_separator = false; + }; + auto process_line = [&](const std::string& line) -> bool { ++line_num; bool matched = re.exec(line.c_str(), 0, nullptr, 0) == 0; @@ -67,20 +96,29 @@ auto grep_file(const std::string& pattern, const GrepOptions& opts, if (matched) { ++match_count; found_any = 1; - if (opts.quiet) return false; if (opts.files_with_matches) { std::printf("%s\n", path.data()); return false; } - if (!opts.count_only) { - if (print_filename) { - std::printf("%s:", path.data()); - } - if (opts.line_numbers) { - std::printf("%zu:", line_num); + if (opts.count_only) return true; + + // Leading context: flush buffered previous lines, then the match. + for (const auto& [ln, s] : before_buf) emit(ln, s); + before_buf.clear(); + emit(line_num, line); + after_pending = after; + } else if (printing) { + if (after_pending > 0) { + // Trailing context line after a match. + emit(line_num, line); + if (--after_pending == 0) need_separator = true; + } else if (before > 0) { + // Stash as potential leading context for a future match. + before_buf.emplace_back(line_num, line); + if (before_buf.size() > static_cast(before)) { + before_buf.erase(before_buf.begin()); } - std::printf("%s\n", line.c_str()); } } return true; @@ -128,6 +166,9 @@ auto grep_main(int argc, char* argv[]) -> int { cfbox::args::OptSpec{'c', false, "count"}, cfbox::args::OptSpec{'l', false, "files-with-matches"}, cfbox::args::OptSpec{'q', false, "quiet"}, + cfbox::args::OptSpec{'A', true, "after-context"}, + cfbox::args::OptSpec{'B', true, "before-context"}, + cfbox::args::OptSpec{'C', true, "context"}, }); if (parsed.has_long("help")) { cfbox::help::print_help(HELP); return 0; } @@ -143,6 +184,29 @@ auto grep_main(int argc, char* argv[]) -> int { opts.files_with_matches = parsed.has('l'); opts.quiet = parsed.has('q'); + // Context counts: -A/-B/-C take a non-negative integer; -C sets both. + bool bad_num = false; + auto take_num = [&](char f, const char* ln) -> int { + auto v = parsed.get_any(f, ln); + if (!v) return -1; // not specified + std::string s{*v}; + char* end = nullptr; + long n = std::strtol(s.c_str(), &end, 10); + if (s.empty() || *end != '\0' || n < 0) { + CFBOX_ERR("grep", "invalid context count for -%c: '%s'", f, s.c_str()); + bad_num = true; + return 0; + } + return static_cast(n); + }; + int a = take_num('A', "after-context"); + int b = take_num('B', "before-context"); + int c = take_num('C', "context"); + if (bad_num) return 2; + if (c >= 0) { a = c; b = c; } + opts.after = (a >= 0) ? a : 0; + opts.before = (b >= 0) ? b : 0; + const auto& pos = parsed.positional(); if (pos.empty()) { CFBOX_ERR("grep", "missing pattern"); diff --git a/tests/integration/test_grep.sh b/tests/integration/test_grep.sh index 1191ad4..5b69d19 100755 --- a/tests/integration/test_grep.sh +++ b/tests/integration/test_grep.sh @@ -88,5 +88,34 @@ else ((++fail)) fi +# -A after-context (with -- separator between non-contiguous groups) +run_test "after_context" "hello world +foo bar +-- +hello again" -A1 "hello" "$tmpdir/grep1.txt" + +# -B before-context +run_test "before_context" "hello world +baz qux +hello again" -B1 "hello" "$tmpdir/grep1.txt" + +# -C context both +run_test "context_both" "hello world +foo bar +baz qux +hello again" -C1 "hello" "$tmpdir/grep1.txt" + +# invalid -A value -> exit 2 +set +e +"$CFBOX" grep -A abc "hello" "$tmpdir/grep1.txt" 2>/dev/null +rc=$? +set -e +if [[ "$rc" -eq 2 ]]; then + ((++pass)) +else + echo "FAIL [invalid_after]: expected exit 2, got $rc" + ((++fail)) +fi + echo "grep: $pass passed, $fail failed" [[ $fail -eq 0 ]] diff --git a/tests/unit/test_grep.cpp b/tests/unit/test_grep.cpp index 7a2a5d0..6746f77 100644 --- a/tests/unit/test_grep.cpp +++ b/tests/unit/test_grep.cpp @@ -1,7 +1,9 @@ +#include + +#include #include #include #include "test_capture.hpp" -#include #if CFBOX_ENABLE_GREP @@ -103,4 +105,76 @@ TEST(GrepTest, MissingPattern) { EXPECT_EQ(rc, 2); } +// --- -A/-B/-C context windows --- +TEST(GrepTest, ContextAfter) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "match\nx\nmatch\n"); + char a0[] = "grep", a1[] = "-A1", a2[] = "match", a3[256]; + std::snprintf(a3, sizeof(a3), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3}; + auto out = capture_stdout([&]{ return grep_main(4, argv); }); + EXPECT_EQ(out, "match\nx\nmatch\n"); +} + +TEST(GrepTest, ContextBefore) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "x\nmatch\n"); + char a0[] = "grep", a1[] = "-B1", a2[] = "match", a3[256]; + std::snprintf(a3, sizeof(a3), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3}; + auto out = capture_stdout([&]{ return grep_main(4, argv); }); + EXPECT_EQ(out, "x\nmatch\n"); +} + +TEST(GrepTest, ContextBoth) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "x\nmatch\ny\n"); + char a0[] = "grep", a1[] = "-C1", a2[] = "match", a3[256]; + std::snprintf(a3, sizeof(a3), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3}; + auto out = capture_stdout([&]{ return grep_main(4, argv); }); + EXPECT_EQ(out, "x\nmatch\ny\n"); +} + +TEST(GrepTest, ContextSeparatorBetweenGroups) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "match\na\nb\nmatch\n"); + char a0[] = "grep", a1[] = "-A1", a2[] = "match", a3[256]; + std::snprintf(a3, sizeof(a3), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3}; + auto out = capture_stdout([&]{ return grep_main(4, argv); }); + EXPECT_EQ(out, "match\na\n--\nmatch\n"); +} + +TEST(GrepTest, ContextZeroIsPlainGrep) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "match\nx\nmatch\n"); + char a0[] = "grep", a1[] = "-A0", a2[] = "match", a3[256]; + std::snprintf(a3, sizeof(a3), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3}; + auto out = capture_stdout([&]{ return grep_main(4, argv); }); + EXPECT_EQ(out, "match\nmatch\n"); +} + +TEST(GrepTest, ContextWithLineNumbers) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "match\nx\nmatch\n"); + char a0[] = "grep", a1[] = "-n", a2[] = "-A1", a3[] = "match", a4[256]; + std::snprintf(a4, sizeof(a4), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3, a4}; + auto out = capture_stdout([&]{ return grep_main(5, argv); }); + EXPECT_EQ(out, "1:match\n2:x\n3:match\n"); +} + +TEST(GrepTest, ContextInvalidNumberExits2) { + TempDir tmp; + auto f = tmp.write_file("d.txt", "match\n"); + char a0[] = "grep", a1[] = "-A", a2[] = "abc", a3[] = "match", a4[256]; + std::snprintf(a4, sizeof(a4), "%s", f.c_str()); + char* argv[] = {a0, a1, a2, a3, a4}; + int rc = 0; + capture_stdout([&]{ rc = grep_main(5, argv); return 0; }); + EXPECT_EQ(rc, 2); +} + #endif // CFBOX_ENABLE_GREP From 80c7bd40d4f98f8ca330ccd509ed8ee4ee4b34e4 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 12:59:58 +0800 Subject: [PATCH 08/21] docs(plan): record grep context batch done --- document/ai/PLAN.md | 6 ++++-- document/notes/2026-06-28-grep-context.md | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 document/notes/2026-06-28-grep-context.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 417ad74..4573838 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批5 `grep -A/-B/-C` 上下文窗口(ring 向前 + after_pending 向后 + 组间 `--`)。 +> 焦点 → Phase 2 批5b `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降,仿 expr.cpp)。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -23,7 +23,9 @@ | 批2 | `cp -a`(归档模式:保权限/属主/时间戳/symlink/递归) | ✅ | a3b89ed | 406/0 | | 批3 | `test` POSIX 子集(文件测试/字符串/整数/复合表达式,退出码语义) | ✅ | 0f9b3cb | 417/0 | | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ✅ | 6dfe329 | 424/0 | -| 批5+ | grep -A/-B/-C、find 布尔表达式、sh 深化(按运维频率排) | ⏳ | — | — | +| 批5a | `grep -A/-B/-C` 上下文(ring 向前 + after_pending 向后 + 组间 `--`) | ✅ | b6920c3 | 431/0 | +| 批5b | `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降) | 🔄 NEXT | — | — | +| 批5c | `sh` 深化(case/`$((expr))`/函数/here-doc + read增强/break N/高级`${}`/trap) | ⏳ | — | — | > 各批细节(触及文件、Result 签名草案、完成门、gotcha)由 `/next <批>` 现场产出脚手架,确认后写入本表 commit/测试列。 diff --git a/document/notes/2026-06-28-grep-context.md b/document/notes/2026-06-28-grep-context.md new file mode 100644 index 0000000..a3f3b6d --- /dev/null +++ b/document/notes/2026-06-28-grep-context.md @@ -0,0 +1,23 @@ +# 2026-06-28 — grep -A/-B/-C 上下文窗口(Phase 2 批5a) + +## 背景 +grep 仅即时打印匹配行,无上下文。补 GNU `-A/-B/-C`,核心难点是流式(for_each_line 逐行 fgetc,不可回卷)下的"向前看"与组间分隔。 + +## 设计决策 +- **向前上下文(-B)**:`before_buf` 为容量 B 的 ring(`vector>`,超容量 erase front)。非匹配、非 after 的行入缓冲;匹配时先 flush 缓冲作为前导上下文。 +- **向后上下文(-A)**:匹配后置 `after_pending=A`,后续非匹配行只要 after_pending>0 就 emit 并递减;连续匹配重置 after_pending(合并块)。 +- **组间 `--`**:`need_separator`(一块 after 耗尽时置位)+ `last_printed`(连续检测)。emit 前若 `need_separator && ln != last_printed+1` 打 `--`。正确处理"连续块合并不打 --"与"尾部不打 --"。 +- **前缀一致性**:`emit` lambda 统一 `path:` + `line_num:` + content,上下文行与匹配行同格式。 +- **互斥语义**:`-q/-l/-c` 下上下文不打印(`printing = !quiet && !files_with_matches && !count_only`),但 match_count 仍计数。 +- **零回归**:默认(无 -A/-B/-C)after=before=0,不进缓冲逻辑,process_line 走原匹配即打印路径,**字节级不变**。 + +## 验证 +- GTest +7(ContextAfter/Before/Both/Separator/Zero/WithLineNumbers/InvalidNumber),全量 **431/0**。 +- 集成 test_grep.sh +4(-A 含 `--`、-B、-C、invalid -A 退 2),16 passed。 +- size-opt **422 KB**(持平——vector 复用既有模板,无新膨胀)。 + +## 陷阱 +- `-C` 覆盖 `-A/-B`(后指定优先,符合 GNU)。 +- ring 用 vector erase front 是 O(n),但 B 通常小;未用 deque 避免模板膨胀。 +- 多文件上下文:每个文件独立 grep_file,`--` 不跨文件(GNU 跨文件也用 `--`,但本实现 per-file;多文件 -A 场景罕见,可接受)。 +- commit: `b6920c3` From 3e29feb9eea33448f4308bf76e97d260c2f4d020 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 13:11:22 +0800 Subject: [PATCH 09/21] feat(find): boolean expressions with recursive-descent parser - expression AST replaces flat AND chain: -a (implicit), -o, ! / -not, ( ) - recursive-descent parser with correct precedence (AND binds tighter than OR) - -maxdepth hoisted to a global option; -exec stays an action leaf - unknown primary / unbalanced paren / missing operand -> exit 1 - PATH vs expression disambiguation (leading '(' or '!' starts an expression) --- src/applets/find.cpp | 363 ++++++++++++++++++--------------- tests/integration/test_find.sh | 35 ++++ tests/unit/test_find.cpp | 71 ++++++- 3 files changed, 304 insertions(+), 165 deletions(-) diff --git a/src/applets/find.cpp b/src/applets/find.cpp index e8d0bb0..81c4978 100644 --- a/src/applets/find.cpp +++ b/src/applets/find.cpp @@ -1,21 +1,18 @@ // find — search for files in a directory hierarchy -// Supported predicates: -name PATTERN, -type [f|d|l], -maxdepth N, -exec CMD {} ; -// Known differences from GNU find: no -iname, -path, -perm, -mtime, -newer, -// -delete, -print0, or complex boolean expressions. -name uses fnmatch-style glob. +// Predicates: -name PATTERN, -type [f|d|l|b|c|p|s], -maxdepth N, -exec CMD {} ; +// Boolean: -a (implicit), -o, ! / -not, ( ) grouping. #include #include #include -#include #include #include -#include #include #include #include -#include #include +#include namespace { @@ -25,34 +22,26 @@ constexpr cfbox::help::HelpEntry HELP = { .one_line = "search for files in a directory hierarchy", .usage = "find [PATH] [PREDICATE]...", .options = " Predicates:\n" - " -name PATTERN match filename (glob)\n" - " -type [f|d|l] match file type\n" - " -maxdepth N descend at most N levels\n" - " -exec CMD {} ; execute command on matches", + " -name PATTERN match filename (glob * and ?)\n" + " -type [f|d|l|b|c|p|s] match file type\n" + " -maxdepth N descend at most N levels (global option)\n" + " -exec CMD {} ; execute command on matches\n" + " Operators (descending precedence):\n" + " -o logical OR -a / implicit logical AND\n" + " ! / -not logical NOT ( ) grouping", .extra = "", }; -// Simple fnmatch-style glob matching: supports * ? and literal chars +// fnmatch-style glob: supports * ? and literal chars. auto glob_match(std::string_view pattern, std::string_view text) -> bool { std::size_t pi = 0, ti = 0; std::size_t star_pi = std::string_view::npos, star_ti = std::string_view::npos; while (ti < text.size()) { if (pi < pattern.size()) { - if (pattern[pi] == '?') { - ++pi; ++ti; - continue; - } - if (pattern[pi] == '*') { - star_pi = pi; - star_ti = ti; - ++pi; - continue; - } - if (pattern[pi] == text[ti]) { - ++pi; ++ti; - continue; - } + if (pattern[pi] == '?') { ++pi; ++ti; continue; } + if (pattern[pi] == '*') { star_pi = pi; star_ti = ti; ++pi; continue; } + if (pattern[pi] == text[ti]) { ++pi; ++ti; continue; } } if (star_pi != std::string_view::npos) { pi = star_pi + 1; @@ -66,54 +55,6 @@ auto glob_match(std::string_view pattern, std::string_view text) -> bool { return pi == pattern.size(); } -struct Predicate { - enum Type { Name, TypeMatch, MaxDepth, Exec }; - Type type = Name; - std::string value; // for -name pattern, -type char - int numeric = 0; // for -maxdepth - std::vector exec_cmd; // for -exec -}; - -auto parse_predicates(int argc, char* argv[], int start) -> std::vector { - std::vector preds; - int i = start; - while (i < argc) { - std::string_view arg{argv[i]}; - if (arg == "-name" && i + 1 < argc) { - preds.push_back({}); - preds.back().type = Predicate::Name; - preds.back().value = argv[i + 1]; - i += 2; - } else if (arg == "-type" && i + 1 < argc) { - preds.push_back({}); - preds.back().type = Predicate::TypeMatch; - preds.back().value = argv[i + 1]; - i += 2; - } else if (arg == "-maxdepth" && i + 1 < argc) { - preds.push_back({}); - preds.back().type = Predicate::MaxDepth; - preds.back().numeric = std::atoi(argv[i + 1]); - i += 2; - } else if (arg == "-exec") { - ++i; - Predicate p; - p.type = Predicate::Exec; - while (i < argc) { - std::string_view token{argv[i]}; - if (token == ";" || token == "\\;") break; - p.exec_cmd.push_back(std::string{token}); - ++i; - } - // skip the terminating ; - if (i < argc) ++i; - preds.push_back(std::move(p)); - } else { - ++i; - } - } - return preds; -} - auto file_type_char(const std::filesystem::directory_entry& entry) -> char { auto st = entry.symlink_status(); switch (st.type()) { @@ -128,51 +69,18 @@ auto file_type_char(const std::filesystem::directory_entry& entry) -> char { } } -auto matches_predicates(const std::filesystem::directory_entry& entry, - const std::vector& preds, int depth) -> bool { - for (const auto& p : preds) { - switch (p.type) { - case Predicate::Name: { - auto fname = entry.path().filename().string(); - if (!glob_match(p.value, fname)) return false; - break; - } - case Predicate::TypeMatch: { - if (p.value.size() != 1) return false; - char tc = file_type_char(entry); - if (tc != p.value[0]) return false; - break; - } - case Predicate::MaxDepth: { - if (depth > p.numeric) return false; - break; - } - case Predicate::Exec: - break; // exec is handled in action, not filtering - } - } - return true; -} - auto run_exec(const std::vector& cmd_template, const std::string& filepath) -> void { std::vector args; args.reserve(cmd_template.size()); for (const auto& part : cmd_template) { - if (part == "{}") { - args.push_back(filepath); - } else { - args.push_back(part); - } + args.push_back(part == "{}" ? filepath : part); } - // build argv std::vector argv_arr; argv_arr.reserve(args.size() + 1); for (auto& a : args) argv_arr.push_back(a.data()); argv_arr.push_back(nullptr); - // fork-like: just system() with proper quoting isn't great, - // so use execvp via fork pid_t pid = fork(); if (pid == 0) { execvp(argv_arr[0], argv_arr.data()); @@ -183,96 +91,223 @@ auto run_exec(const std::vector& cmd_template, } } -auto do_find(const std::filesystem::path& root, const std::vector& preds) -> int { - bool has_exec = false; - for (const auto& p : preds) { - if (p.type == Predicate::Exec) { has_exec = true; break; } +// Expression tree. And/Or hold N children; Not holds 1; Name/Type/Exec are +// leaves; True is a no-op (used for the empty expression and -maxdepth, which +// is a global option that does not participate in evaluation). +struct Node { + enum Kind { And, Or, Not, Name, Type, Exec, True }; + Kind kind = True; + std::string value; // Name pattern / Type char + std::vector exec_cmd; // Exec command template + std::vector children; // And/Or/Not sub-expressions + + Node() = default; + explicit Node(Kind k) : kind(k) {} +}; + +auto eval(const Node& n, const std::filesystem::directory_entry& entry) -> bool { + switch (n.kind) { + case Node::True: + return true; + case Node::Name: + return glob_match(n.value, entry.path().filename().string()); + case Node::Type: + return n.value.size() == 1 && file_type_char(entry) == n.value[0]; + case Node::Exec: + run_exec(n.exec_cmd, entry.path().string()); + return true; // actions always evaluate true + case Node::Not: + return !eval(n.children[0], entry); + case Node::And: + for (const auto& c : n.children) + if (!eval(c, entry)) return false; + return true; + case Node::Or: + for (const auto& c : n.children) + if (eval(c, entry)) return true; + return false; + } + return false; +} + +auto has_exec_node(const Node& n) -> bool { + if (n.kind == Node::Exec) return true; + for (const auto& c : n.children) + if (has_exec_node(c)) return true; + return false; +} + +// Recursive-descent parser over the predicate token stream. Sets `failed` on a +// syntax error (unknown predicate, missing operand, unbalanced paren). +struct Parser { + const std::vector& toks; + int maxdepth = -1; // global -maxdepth (out) + std::size_t pos = 0; + bool failed = false; + + [[nodiscard]] auto at_end() const -> bool { return pos >= toks.size(); } + auto peek() const -> const std::string& { return toks[pos]; } + + auto parse_or() -> Node { + Node left = parse_and(); + if (failed || at_end() || peek() != "-o") return left; + Node node{Node::Or}; + node.children.push_back(std::move(left)); + while (!failed && !at_end() && peek() == "-o") { + ++pos; + node.children.push_back(parse_and()); + } + return node; + } + + auto parse_and() -> Node { + Node left = parse_not(); + if (failed || at_end() || peek() == "-o" || peek() == ")") return left; + Node node{Node::And}; + node.children.push_back(std::move(left)); + while (!failed && !at_end() && peek() != "-o" && peek() != ")") { + if (peek() == "-a") ++pos; // explicit; otherwise implicit AND + node.children.push_back(parse_not()); + } + return node; } + auto parse_not() -> Node { + if (!at_end() && (peek() == "!" || peek() == "-not")) { + ++pos; + Node node{Node::Not}; + node.children.push_back(parse_not()); + return node; + } + return parse_primary(); + } + + auto parse_primary() -> Node { + if (at_end()) { failed = true; return Node{Node::True}; } + const std::string& t = peek(); + + if (t == "(") { + ++pos; + Node inner = parse_or(); + if (failed) return inner; + if (at_end() || peek() != ")") { failed = true; return inner; } + ++pos; + return inner; + } + if (t == "-name") { + ++pos; + if (at_end()) { failed = true; return Node{Node::True}; } + Node n{Node::Name}; + n.value = peek(); + ++pos; + return n; + } + if (t == "-type") { + ++pos; + if (at_end()) { failed = true; return Node{Node::True}; } + Node n{Node::Type}; + n.value = peek(); + ++pos; + return n; + } + if (t == "-maxdepth") { + ++pos; + if (at_end()) { failed = true; return Node{Node::True}; } + maxdepth = std::atoi(peek().c_str()); + ++pos; + return Node{Node::True}; // global option: no-op in the tree + } + if (t == "-exec") { + ++pos; + Node n{Node::Exec}; + while (!at_end() && peek() != ";" && peek() != "\\;") { + n.exec_cmd.push_back(peek()); + ++pos; + } + if (!at_end()) ++pos; // consume the terminating ; + return n; + } + + failed = true; + CFBOX_ERR("find", "unknown primary or operator '%s'", t.c_str()); + return Node{Node::True}; + } +}; + +auto do_find(const std::filesystem::path& root, const Node& expr, int maxdepth) -> int { + const bool print = !has_exec_node(expr); + std::error_code ec; - auto it = std::filesystem::recursive_directory_iterator(root, - std::filesystem::directory_options::follow_directory_symlink, ec); + auto it = std::filesystem::recursive_directory_iterator( + root, std::filesystem::directory_options::follow_directory_symlink, ec); if (ec) { CFBOX_ERR("find", "'%s': %s", root.string().c_str(), ec.message().c_str()); return 1; } - int rc = 0; - // Check the root itself + auto emit = [&](const std::filesystem::directory_entry& entry, int depth) { + if (maxdepth >= 0 && depth > maxdepth) return; + if (eval(expr, entry) && print) { + std::printf("%s\n", entry.path().string().c_str()); + } + // When -exec is present, eval() already ran it as a side effect; the + // default -print is suppressed. + }; + + // Evaluate the root entry itself (depth 0). { std::error_code ec2; std::filesystem::directory_entry root_entry(root, ec2); - if (!ec2 && matches_predicates(root_entry, preds, 0)) { - if (has_exec) { - for (const auto& p : preds) { - if (p.type == Predicate::Exec) { - run_exec(p.exec_cmd, root_entry.path().string()); - } - } - } else { - std::printf("%s\n", root_entry.path().string().c_str()); - } - } + if (!ec2) emit(root_entry, 0); } + int rc = 0; for (const auto& entry : it) { - int depth = it.depth() + 1; // +1 because root is depth 0 - - // Check maxdepth predicate — disable recursion if we'd exceed it - bool exceeded = false; - for (const auto& p : preds) { - if (p.type == Predicate::MaxDepth && depth > p.numeric) { - exceeded = true; - it.disable_recursion_pending(); - break; - } - } - // Even at maxdepth+1 we might want to print the name - // but standard find doesn't list beyond maxdepth, so skip - if (exceeded) continue; - - if (matches_predicates(entry, preds, depth)) { - if (has_exec) { - for (const auto& p : preds) { - if (p.type == Predicate::Exec) { - run_exec(p.exec_cmd, entry.path().string()); - } - } - } else { - std::printf("%s\n", entry.path().string().c_str()); - } + int depth = it.depth() + 1; // root is depth 0 + if (maxdepth >= 0 && depth > maxdepth) { + it.disable_recursion_pending(); + continue; } + emit(entry, depth); } - return rc; } } // namespace auto find_main(int argc, char* argv[]) -> int { - // Handle --help/--version before any other processing for (int i = 1; i < argc; ++i) { std::string_view arg{argv[i]}; if (arg == "--help") { cfbox::help::print_help(HELP); return 0; } if (arg == "--version") { cfbox::help::print_version(HELP); return 0; } } - if (argc < 2) { - // default: find . - return do_find(".", {}); - } - - // First non-option argument is the path (if it doesn't start with '-') - // Otherwise default to "." + // First non-option argument is the PATH, unless it begins an expression + // (a predicate, '(', ')', or '!'). Default root is ".". int start = 1; std::filesystem::path root = "."; - - if (argv[1][0] != '-') { + auto is_expr_start = [](const char* s) -> bool { + return s[0] == '-' || s[0] == '(' || s[0] == ')' || + (s[0] == '!' && s[1] == '\0'); + }; + if (argc > 1 && !is_expr_start(argv[1])) { root = argv[1]; start = 2; } - auto preds = parse_predicates(argc, argv, start); + std::vector tokens; + for (int i = start; i < argc; ++i) tokens.emplace_back(argv[i]); + + // Empty expression prints everything (root + descendants). + if (tokens.empty()) { + return do_find(root, Node{Node::True}, -1); + } + + Parser parser{tokens}; + Node expr = parser.parse_or(); + if (parser.failed || !parser.at_end()) { + return 1; // GNU find: invalid expression -> exit 1 + } - return do_find(root, preds); + return do_find(root, expr, parser.maxdepth); } diff --git a/tests/integration/test_find.sh b/tests/integration/test_find.sh index 1626895..0c6708e 100755 --- a/tests/integration/test_find.sh +++ b/tests/integration/test_find.sh @@ -89,6 +89,41 @@ run_test "name_and_type" "$tmpdir/file1.txt $tmpdir/subdir/nested.txt $tmpdir/subdir/deep/deep.txt" "$tmpdir" -name "*.txt" -type f +# -o (OR): txt or cpp files +run_test "or" "$tmpdir/file1.txt +$tmpdir/file2.cpp +$tmpdir/subdir/nested.txt +$tmpdir/subdir/deep/deep.txt" "$tmpdir" -name "*.txt" -o -name "*.cpp" + +# ! -type f (NOT): dirs and symlinks, exclude regular files +run_test "not_type_f" "$tmpdir +$tmpdir/link1 +$tmpdir/subdir +$tmpdir/subdir/deep" "$tmpdir" ! -type f + +# ( ... ) grouping +run_test "parens" "$tmpdir/file1.txt +$tmpdir/file2.cpp +$tmpdir/subdir/nested.txt +$tmpdir/subdir/deep/deep.txt" "$tmpdir" \( -name "*.txt" -o -name "*.cpp" \) + +# explicit -a (AND) +run_test "explicit_and" "$tmpdir/file1.txt +$tmpdir/subdir/nested.txt +$tmpdir/subdir/deep/deep.txt" "$tmpdir" -name "*.txt" -a -type f + +# unknown predicate -> exit 1 +set +e +"$CFBOX" find "$tmpdir" -badop 2>/dev/null +rc=$? +set -e +if [[ "$rc" -eq 1 ]]; then + ((++pass)) +else + echo "FAIL [unknown_pred]: expected exit 1, got $rc" + ((++fail)) +fi + # -exec echo result=$("$CFBOX" find "$tmpdir" -name "file1.txt" -exec echo "found" ";" 2>/dev/null) count=$(echo "$result" | grep -c "found" || true) diff --git a/tests/unit/test_find.cpp b/tests/unit/test_find.cpp index cbdc7f0..d54fc9a 100644 --- a/tests/unit/test_find.cpp +++ b/tests/unit/test_find.cpp @@ -1,7 +1,10 @@ +#include +#include + +#include #include #include #include "test_capture.hpp" -#include #if CFBOX_ENABLE_FIND @@ -88,4 +91,70 @@ TEST(FindTest, NameAndTypeCombined) { EXPECT_EQ(out.find("docs.txt"), std::string::npos); } +// --- boolean operators --- +TEST(FindTest, FindByOr) { + TempDir tmp; + tmp.write_file("a.txt", ""); + tmp.write_file("b.cpp", ""); + tmp.write_file("c.md", ""); + char a0[] = "find", a1[256], a2[] = "-name", a3[] = "*.txt", + a4[] = "-o", a5[] = "-name", a6[] = "*.cpp"; + std::snprintf(a1, sizeof(a1), "%s", tmp.path.string().c_str()); + char* argv[] = {a0, a1, a2, a3, a4, a5, a6}; + auto out = capture_stdout([&]{ return find_main(7, argv); }); + EXPECT_NE(out.find("a.txt"), std::string::npos); + EXPECT_NE(out.find("b.cpp"), std::string::npos); + EXPECT_EQ(out.find("c.md"), std::string::npos); +} + +TEST(FindTest, FindByNotType) { + TempDir tmp; + tmp.write_file("file.txt", ""); + std::filesystem::create_directory(tmp.path / "dir"); + char a0[] = "find", a1[256], a2[] = "!", a3[] = "-type", a4[] = "f"; + std::snprintf(a1, sizeof(a1), "%s", tmp.path.string().c_str()); + char* argv[] = {a0, a1, a2, a3, a4}; + auto out = capture_stdout([&]{ return find_main(5, argv); }); + EXPECT_EQ(out.find("file.txt"), std::string::npos); // excluded by !-type f + EXPECT_NE(out.find("dir"), std::string::npos); +} + +TEST(FindTest, ExplicitAnd) { + TempDir tmp; + tmp.write_file("match.txt", ""); + tmp.write_file("nomatch.cpp", ""); + char a0[] = "find", a1[256], a2[] = "-name", a3[] = "*.txt", + a4[] = "-a", a5[] = "-type", a6[] = "f"; + std::snprintf(a1, sizeof(a1), "%s", tmp.path.string().c_str()); + char* argv[] = {a0, a1, a2, a3, a4, a5, a6}; + auto out = capture_stdout([&]{ return find_main(7, argv); }); + EXPECT_NE(out.find("match.txt"), std::string::npos); + EXPECT_EQ(out.find("nomatch.cpp"), std::string::npos); +} + +TEST(FindTest, ParensGrouping) { + TempDir tmp; + tmp.write_file("x.txt", ""); + tmp.write_file("y.cpp", ""); + tmp.write_file("z.md", ""); + char a0[] = "find", a1[256], a2[] = "(", a3[] = "-name", a4[] = "*.txt", + a5[] = "-o", a6[] = "-name", a7[] = "*.cpp", a8[] = ")"; + std::snprintf(a1, sizeof(a1), "%s", tmp.path.string().c_str()); + char* argv[] = {a0, a1, a2, a3, a4, a5, a6, a7, a8}; + auto out = capture_stdout([&]{ return find_main(9, argv); }); + EXPECT_NE(out.find("x.txt"), std::string::npos); + EXPECT_NE(out.find("y.cpp"), std::string::npos); + EXPECT_EQ(out.find("z.md"), std::string::npos); +} + +TEST(FindTest, UnknownPredicateExits1) { + TempDir tmp; + char a0[] = "find", a1[256], a2[] = "-badop"; + std::snprintf(a1, sizeof(a1), "%s", tmp.path.string().c_str()); + char* argv[] = {a0, a1, a2}; + int rc = 0; + capture_stdout([&]{ rc = find_main(3, argv); return 0; }); + EXPECT_EQ(rc, 1); +} + #endif // CFBOX_ENABLE_FIND From ee7c5889c3246c62708b56b5ef99142f6a15cc62 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 13:12:07 +0800 Subject: [PATCH 10/21] docs(plan): record find boolean batch done --- document/ai/PLAN.md | 6 +++--- document/notes/2026-06-28-find-boolean.md | 24 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 document/notes/2026-06-28-find-boolean.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 4573838..c17ce53 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批5b `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降,仿 expr.cpp)。 +> 焦点 → Phase 2 批5c `sh` 全收(case/`$((expr))`/函数/here-doc + read增强/break N/高级`${}`/trap)。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -24,8 +24,8 @@ | 批3 | `test` POSIX 子集(文件测试/字符串/整数/复合表达式,退出码语义) | ✅ | 0f9b3cb | 417/0 | | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ✅ | 6dfe329 | 424/0 | | 批5a | `grep -A/-B/-C` 上下文(ring 向前 + after_pending 向后 + 组间 `--`) | ✅ | b6920c3 | 431/0 | -| 批5b | `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降) | 🔄 NEXT | — | — | -| 批5c | `sh` 深化(case/`$((expr))`/函数/here-doc + read增强/break N/高级`${}`/trap) | ⏳ | — | — | +| 批5b | `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降) | ✅ | 3e29feb | 436/0 | +| 批5c | `sh` 深化(case/`$((expr))`/函数/here-doc + read增强/break N/高级`${}`/trap) | 🔄 NEXT | — | — | > 各批细节(触及文件、Result 签名草案、完成门、gotcha)由 `/next <批>` 现场产出脚手架,确认后写入本表 commit/测试列。 diff --git a/document/notes/2026-06-28-find-boolean.md b/document/notes/2026-06-28-find-boolean.md new file mode 100644 index 0000000..ed04c51 --- /dev/null +++ b/document/notes/2026-06-28-find-boolean.md @@ -0,0 +1,24 @@ +# 2026-06-28 — find 布尔表达式(Phase 2 批5b) + +## 背景 +find 是扁平 AND 谓词链(`Predicate` enum + `matches_predicates` 全真折叠),无 -o/!/()/-not,未知谓词静默跳过。改为表达式树 + 递归下降。 + +## 设计决策 +- **AST**(`Node{And,Or,Not,Name,Type,Exec,True}`):And/Or 持 N 子;Not 持 1 子;Name/Type/Exec/True 叶子。`Node(Kind)` 构造函数(非聚合)规避 designated-init 在 -Werror 下的诊断差异。 +- **递归下降**(`Parser` + token 游标,仿 expr.cpp):`parse_or → parse_and`(隐式 AND:相邻 primary 自动 AND)`→ parse_not → parse_primary`。优先级 AND 高于 OR(`-name a -o -name b -type f` = `a OR (b AND f)`)。 +- **-maxdepth 提为 global option**:parse 遇 `-maxdepth N` 设 `parser.maxdepth`,返回 `True` 占位(不进求值树,作用于遍历 `disable_recursion_pending`)。 +- **-exec 作 action 叶子**:eval 时执行 + 返回 true;短路求值下 `-name x -o -exec ...` 的 exec 在 OR 左真时不执行(符合 GNU)。 +- **PATH vs 表达式消歧**:argv[1] 若以 `-`/`(`/`)` 开头或为 `!`,则是表达式(PATH 默认 `.`),否则是 PATH。 +- **错误处理**:未知谓词/缺操作数/括号不匹配 → `failed=true` → exit 1(GNU find 语义)。 + +## 验证 +- GTest +5(FindByOr/FindByNotType/ExplicitAnd/ParensGrouping/UnknownPredicateExits1),全量 **436/0**。 +- 集成 test_find.sh +5(-o、! -type f、\( \)、显式 -a、未知谓词 exit 1),16 passed。 +- size-opt **422 KB**(持平——AST 用现有 vector,无新模板膨胀)。 + +## 陷阱 +- 隐式 AND:`-name x -type f` 无 `-a` 也按 AND(GNU 行为);`parse_and` 循环 `peek != "-o" && peek != ")"` 触发。 +- `-exec` 与默认 `-print`:表达式含 Exec 节点时抑制默认打印(GNU);混合 `-exec ... -print` 显式 print 未实现(后置)。 +- 括号在 shell 需转义 `\(` `\)`,经 argv 传入为 `(` `)`(解析器认两种)。 +- 未补 -iname/-path/-perm/-mtime/-newer/-prune/-delete(POSIX 子集外,后置)。 +- commit: `3e29feb` From 40d48dc7bd381cca4d289a64f3872780abfd355f Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 13:25:07 +0800 Subject: [PATCH 11/21] feat(sh): arithmetic expansion and assignment RHS expansion - $((expr)) recursive-descent evaluator: + - * / %, comparisons, && || !, parens, integer literals, variables (bare or $VAR; unset/non-numeric -> 0) - fix assignment RHS not being expanded (X=$((i+1)) now works; was stored raw) - integration +5 arithmetic cases incl. a real counting while loop --- src/applets/sh/sh_executor.cpp | 11 ++- src/applets/sh/sh_expand.cpp | 156 +++++++++++++++++++++++++++++++++ tests/integration/test_sh.sh | 22 +++++ 3 files changed, 187 insertions(+), 2 deletions(-) diff --git a/src/applets/sh/sh_executor.cpp b/src/applets/sh/sh_executor.cpp index 4a4da38..39d2dc9 100644 --- a/src/applets/sh/sh_executor.cpp +++ b/src/applets/sh/sh_executor.cpp @@ -78,9 +78,16 @@ static auto restore_redirections(std::vector>& saved) -> voi static auto execute_pipeline(Pipeline& node, ShellState& state) -> int; static auto execute_simple(SimpleCommand& cmd, ShellState& state) -> int { - // Apply assignments + // Apply assignments. The RHS is expanded (param/arith/command sub); parts + // are joined back into one value since assignment does not field-split. for (auto& [name, value] : cmd.assigns) { - state.set_var(name, value); + auto parts = expand_word(value, state); + std::string joined; + for (std::size_t k = 0; k < parts.size(); ++k) { + if (k > 0) joined += ' '; + joined += parts[k]; + } + state.set_var(name, joined); } if (cmd.words.empty()) { diff --git a/src/applets/sh/sh_expand.cpp b/src/applets/sh/sh_expand.cpp index a6c1fdf..bffd814 100644 --- a/src/applets/sh/sh_expand.cpp +++ b/src/applets/sh/sh_expand.cpp @@ -1,10 +1,13 @@ #include "sh.hpp" +#include #include #include #include #include +#include + namespace { struct PipeCloser { @@ -14,6 +17,134 @@ struct PipeCloser { }; using unique_pipe = std::unique_ptr; +// Recursive-descent evaluator for $((expr)). Subset: + - * / %, comparisons, +// && || !, unary +/-, parentheses, integer literals, and variables (bare names +// or $VAR; empty/unset/non-numeric -> 0, matching shell semantics). +struct ArithEvaluator { + std::string_view expr; + std::size_t pos = 0; + const cfbox::sh::ShellState& st; + bool error = false; + + void skip_ws() { + while (pos < expr.size() && std::isspace(static_cast(expr[pos]))) ++pos; + } + [[nodiscard]] auto eof() const -> bool { return pos >= expr.size(); } + + auto match2(char a, char b) -> bool { + skip_ws(); + if (pos + 1 < expr.size() && expr[pos] == a && expr[pos + 1] == b) { pos += 2; return true; } + return false; + } + auto match(char c) -> bool { + skip_ws(); + if (!eof() && expr[pos] == c) { ++pos; return true; } + return false; + } + + auto run() -> long { return parse_or(); } + auto parse_or() -> long { + auto l = parse_and(); + while (match2('|', '|')) { auto r = parse_and(); l = (l || r) ? 1 : 0; } + return l; + } + auto parse_and() -> long { + auto l = parse_eq(); + while (match2('&', '&')) { auto r = parse_eq(); l = (l && r) ? 1 : 0; } + return l; + } + auto parse_eq() -> long { + auto l = parse_cmp(); + for (;;) { + if (match2('=', '=')) { l = (l == parse_cmp()) ? 1 : 0; } + else if (match2('!', '=')) { l = (l != parse_cmp()) ? 1 : 0; } + else break; + } + return l; + } + auto parse_cmp() -> long { + auto l = parse_add(); + for (;;) { + if (match2('<', '=')) { l = (l <= parse_add()) ? 1 : 0; } + else if (match2('>', '=')) { l = (l >= parse_add()) ? 1 : 0; } + else if (match('<')) { l = (l < parse_add()) ? 1 : 0; } + else if (match('>')) { l = (l > parse_add()) ? 1 : 0; } + else break; + } + return l; + } + auto parse_add() -> long { + auto l = parse_mul(); + for (;;) { + if (match('+')) l += parse_mul(); + else if (match('-')) l -= parse_mul(); + else break; + } + return l; + } + auto parse_mul() -> long { + auto l = parse_unary(); + for (;;) { + if (match('*')) l *= parse_unary(); + else if (match('/')) { auto r = parse_unary(); if (r == 0) { error = true; return 0; } l /= r; } + else if (match('%')) { auto r = parse_unary(); if (r == 0) { error = true; return 0; } l %= r; } + else break; + } + return l; + } + auto parse_unary() -> long { + if (match('!')) return parse_unary() == 0 ? 1 : 0; + if (match('-')) return -parse_unary(); + if (match('+')) return parse_unary(); + return parse_primary(); + } + auto parse_primary() -> long { + skip_ws(); + if (match('(')) { + auto v = parse_or(); + match(')'); + return v; + } + if (!eof() && std::isdigit(static_cast(expr[pos]))) { + long v = 0; + while (!eof() && std::isdigit(static_cast(expr[pos]))) { + v = v * 10 + (expr[pos] - '0'); + ++pos; + } + return v; + } + std::string name; + if (!eof() && expr[pos] == '$') { + ++pos; + if (!eof() && expr[pos] == '{') { + ++pos; + while (!eof() && expr[pos] != '}') name += expr[pos++]; + if (!eof()) ++pos; + } else { + while (!eof() && (std::isalnum(static_cast(expr[pos])) || expr[pos] == '_')) name += expr[pos++]; + } + } else if (!eof() && (std::isalpha(static_cast(expr[pos])) || expr[pos] == '_')) { + while (!eof() && (std::isalnum(static_cast(expr[pos])) || expr[pos] == '_')) name += expr[pos++]; + } + if (name.empty()) { error = true; return 0; } + auto val = st.get_var(name); + if (val.empty()) return 0; + char* end = nullptr; + long n = std::strtol(val.c_str(), &end, 10); + return (*end == '\0') ? n : 0; + } +}; + +auto eval_arith(const std::string& expr, const cfbox::sh::ShellState& state) -> std::string { + ArithEvaluator ae{expr, 0, state}; + auto v = ae.run(); + if (ae.error) { + CFBOX_ERR("sh", "arithmetic expression error"); + return "0"; + } + return std::to_string(v); +} + } // namespace namespace cfbox::sh { @@ -59,6 +190,31 @@ static auto process_dollar(Iter& it, Iter end, const ShellState& state) -> std:: } if (c == '(') { + // $((expr)) arithmetic, distinguished from $(...) command substitution. + auto next_it = it; + ++next_it; + if (next_it != end && *next_it == '(') { + ++it; ++it; // skip both ( + int depth = 0; + std::string expr; + while (it != end) { + char ch = *it; + if (ch == '(') { ++depth; expr += ch; } + else if (ch == ')') { + if (depth == 0) { + ++it; // first ) of )) + if (it != end && *it == ')') ++it; // second ) + break; + } + --depth; + expr += ch; + } else { + expr += ch; + } + ++it; + } + return eval_arith(expr, state); + } // $(...) — basic command substitution support ++it; // skip ( int depth = 1; diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index c795e7d..a0a793a 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -93,6 +93,28 @@ out=$($SH -c 'echo $(echo nested)') assert_output "nested" "$out" ((++pass)) +# ── Arithmetic $((expr)) ───────────────────────────────────────── +out=$($SH -c 'echo $((1 + 2 * 3))') +assert_output "7" "$out" +((++pass)) + +out=$($SH -c 'echo $((10 - 4))') +assert_output "6" "$out" +((++pass)) + +out=$($SH -c 'i=5; echo $((i + 1))') +assert_output "6" "$out" +((++pass)) + +out=$($SH -c 'i=0; while [ $i -lt 3 ]; do echo $i; i=$((i+1)); done') +expected=$'0\n1\n2' +assert_output "$expected" "$out" +((++pass)) + +out=$($SH -c 'echo $(( (2 + 3) * 4 ))') +assert_output "20" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From 181eaa9de3423d850160cd9cca1acb48a6cb4893 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 13:37:10 +0800 Subject: [PATCH 12/21] feat(sh): case statement with glob pattern matching - case WORD in PATTERN(|PATTERN)) body;; ... esac via CaseClause AST node - lexer emits DSemi (;;) so case bodies terminate correctly - patterns matched with fnmatch; * ? [ ] are pattern syntax, not filename globs - add expand_noglob: expand param/arith/command without field-split/glob, for case words/patterns (fixes * matching cwd files instead of any string) - integration +4 case cases: | alternation, * default, f* prefix, loop dispatch --- src/applets/sh/sh.hpp | 20 ++++++++++-- src/applets/sh/sh_executor.cpp | 15 +++++++++ src/applets/sh/sh_expand.cpp | 25 ++++++++++++++ src/applets/sh/sh_lexer.cpp | 8 ++++- src/applets/sh/sh_parser.cpp | 59 ++++++++++++++++++++++++++++++++++ tests/integration/test_sh.sh | 18 +++++++++++ 6 files changed, 142 insertions(+), 3 deletions(-) diff --git a/src/applets/sh/sh.hpp b/src/applets/sh/sh.hpp index 40f3160..0f24488 100644 --- a/src/applets/sh/sh.hpp +++ b/src/applets/sh/sh.hpp @@ -16,7 +16,7 @@ namespace cfbox::sh { // ── Token ──────────────────────────────────────────────────────── enum class TokType { Word, Newline, Eof, - Pipe, Semi, And, Or, // | ; && || + Pipe, Semi, DSemi, And, Or, // | ; ;; && || LParen, RParen, LBrace, RBrace, Less, Great, DGreate, // < > >> LessAnd, GreatAnd, // <& >& @@ -50,6 +50,7 @@ struct WhileClause; struct ForClause; struct Subshell; struct BraceGroup; +struct CaseClause; using Command = std::variant, @@ -57,7 +58,8 @@ using Command = std::variant, std::unique_ptr, std::unique_ptr, - std::unique_ptr>; + std::unique_ptr, + std::unique_ptr>; struct Pipeline { std::vector commands; @@ -96,6 +98,16 @@ struct BraceGroup { std::unique_ptr body; }; +struct CaseBranch { + std::vector patterns; // pat1 | pat2 (glob, pre-expansion) + std::unique_ptr body; // nullptr for an empty branch body +}; + +struct CaseClause { + std::string word; // value to match (pre-expansion) + std::vector branches; +}; + // ── Shell State ────────────────────────────────────────────────── class ShellState { public: @@ -187,6 +199,7 @@ class Parser { auto parse_for() -> std::unique_ptr; auto parse_subshell() -> std::unique_ptr; auto parse_brace_group() -> std::unique_ptr; + auto parse_case() -> std::unique_ptr; Lexer& lexer_; Token current_; @@ -206,5 +219,8 @@ auto run_builtin(const std::string& name, std::vector& args, ShellS // ── Word Expansion ─────────────────────────────────────────────── auto expand_word(const std::string& word, const ShellState& state) -> std::vector; auto expand_words(const std::vector& words, const ShellState& state) -> std::vector; +// Expand param/arith/command/quote but skip field splitting and globbing — +// for case words/patterns where * ? [ ] are pattern syntax, not filename globs. +auto expand_noglob(const std::string& word, const ShellState& state) -> std::string; } // namespace cfbox::sh diff --git a/src/applets/sh/sh_executor.cpp b/src/applets/sh/sh_executor.cpp index 39d2dc9..12b418f 100644 --- a/src/applets/sh/sh_executor.cpp +++ b/src/applets/sh/sh_executor.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -310,6 +311,20 @@ auto execute_command(Command& cmd, ShellState& state) -> int { else if constexpr (std::is_same_v>) { return node->body ? execute(*node->body, state) : 0; } + else if constexpr (std::is_same_v>) { + // Expand the case word, then match against each branch's glob patterns. + // Patterns use expand_noglob so * ? [ ] stay as pattern syntax. + std::string value = expand_noglob(node->word, state); + for (auto& br : node->branches) { + for (auto& pat : br.patterns) { + std::string pat_str = expand_noglob(pat, state); + if (::fnmatch(pat_str.c_str(), value.c_str(), 0) == 0) { + return br.body ? execute(*br.body, state) : 0; + } + } + } + return 0; // no pattern matched + } else { return 1; } diff --git a/src/applets/sh/sh_expand.cpp b/src/applets/sh/sh_expand.cpp index bffd814..e297f30 100644 --- a/src/applets/sh/sh_expand.cpp +++ b/src/applets/sh/sh_expand.cpp @@ -384,4 +384,29 @@ auto expand_words(const std::vector& words, const ShellState& state return result; } +// Expand param/arith/command substitution and quotes, but skip field splitting +// and filename globbing. Used for `case` words and patterns, where the glob +// metacharacters (* ? [ ]) are pattern syntax rather than filename globs. +auto expand_noglob(const std::string& word, const ShellState& state) -> std::string { + std::string expanded; + bool in_double_quotes = false; + auto it = word.cbegin(); + auto end = word.cend(); + while (it != end) { + char c = *it; + if (c == '"') { in_double_quotes = !in_double_quotes; ++it; continue; } + if (c == '\\' && !in_double_quotes) { ++it; if (it != end) { expanded += *it++; } continue; } + if (c == '$') { expanded += process_dollar(it, end, state); continue; } + if (c == '~' && !in_double_quotes && expanded.empty()) { + ++it; + const char* home = std::getenv("HOME"); + expanded += home ? home : "/root"; + continue; + } + expanded += c; + ++it; + } + return expanded; +} + } // namespace cfbox::sh diff --git a/src/applets/sh/sh_lexer.cpp b/src/applets/sh/sh_lexer.cpp index f39e0c6..4715d7c 100644 --- a/src/applets/sh/sh_lexer.cpp +++ b/src/applets/sh/sh_lexer.cpp @@ -97,7 +97,13 @@ auto Lexer::read_operator() -> std::optional { tok.type = TokType::Semi; pos_ += 1; } return tok; - case ';': tok.type = TokType::Semi; pos_ += 1; return tok; + case ';': + if (pos_ + 1 < input_.size() && input_[pos_ + 1] == ';') { + tok.type = TokType::DSemi; pos_ += 2; + } else { + tok.type = TokType::Semi; pos_ += 1; + } + return tok; case '(': tok.type = TokType::LParen; pos_ += 1; return tok; case ')': tok.type = TokType::RParen; pos_ += 1; return tok; case '{': tok.type = TokType::LBrace; pos_ += 1; return tok; diff --git a/src/applets/sh/sh_parser.cpp b/src/applets/sh/sh_parser.cpp index 565ee6a..6af1fb6 100644 --- a/src/applets/sh/sh_parser.cpp +++ b/src/applets/sh/sh_parser.cpp @@ -146,6 +146,7 @@ auto Parser::parse_command() -> Command { if (current_.value == "while") return parse_while(); if (current_.value == "until") return parse_while(); // reuse, set is_until if (current_.value == "for") return parse_for(); + if (current_.value == "case") return parse_case(); } return parse_simple_command(); @@ -391,4 +392,62 @@ auto Parser::parse_brace_group() -> std::unique_ptr { return result; } +auto Parser::parse_case() -> std::unique_ptr { + auto result = std::make_unique(); + advance(); // consume 'case' + + if (current_.type != TokType::Word) { + CFBOX_ERR("sh", "syntax error: expected word after 'case'"); + return result; + } + result->word = current_.value; + advance(); + + while (current_.type == TokType::Newline) advance(); + if (!expect_keyword("in")) { + CFBOX_ERR("sh", "syntax error: expected 'in' after case word"); + return result; + } + while (current_.type == TokType::Newline) advance(); + + while (!(current_.type == TokType::Word && current_.value == "esac")) { + if (current_.type == TokType::Eof) { + CFBOX_ERR("sh", "syntax error: unexpected EOF in case"); + break; + } + CaseBranch br; + if (current_.type == TokType::Word) { + br.patterns.push_back(current_.value); + advance(); + } + while (current_.type == TokType::Pipe) { + advance(); + if (current_.type == TokType::Word) { + br.patterns.push_back(current_.value); + advance(); + } + } + if (current_.type != TokType::RParen) { + CFBOX_ERR("sh", "syntax error: expected ')' in case pattern"); + break; + } + advance(); // consume ')' + + br.body = parse_compound_list(); // stops at DSemi / esac + result->branches.push_back(std::move(br)); + + if (current_.type == TokType::DSemi) { + advance(); + while (current_.type == TokType::Newline) advance(); + } else { + break; // only esac may follow without ;; + } + } + + if (!expect_keyword("esac")) { + CFBOX_ERR("sh", "syntax error: expected 'esac'"); + } + return result; +} + } // namespace cfbox::sh diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index a0a793a..45c6e23 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -115,6 +115,24 @@ out=$($SH -c 'echo $(( (2 + 3) * 4 ))') assert_output "20" "$out" ((++pass)) +# ── case statement ─────────────────────────────────────────────── +out=$($SH -c 'case start in start) echo go;; stop) echo halt;; esac') +assert_output "go" "$out" +((++pass)) + +out=$($SH -c 'case x in a|b) echo ab;; *) echo other;; esac') +assert_output "other" "$out" +((++pass)) + +out=$($SH -c 'case foo in f*) echo begins_f;; *) echo no;; esac') +assert_output "begins_f" "$out" +((++pass)) + +out=$($SH -c 'for w in 1 2 3; do case $w in 1) echo one;; 2) echo two;; *) echo many;; esac; done') +expected=$'one\ntwo\nmany' +assert_output "$expected" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From bfcc71ea1fb97fc85c04b01cf647275277ae4302 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 13:46:17 +0800 Subject: [PATCH 13/21] feat(sh): user-defined functions with return and local scopes - FuncDef AST node; name() { body; } parsed and registered in ShellState - function calls run the body in-process with new positional params ($1..) - return N sets a pending flag consumed at the call site, propagating through AndOr / while / for execution - local declares function-scoped vars via a scope stack; get_var/set_var are scope-aware so locals do not leak to globals - integration +4: definition+call, params, return status, local isolation --- src/applets/sh/sh.hpp | 25 ++++++++++++++++++++- src/applets/sh/sh_builtins.cpp | 25 +++++++++++++++++++++ src/applets/sh/sh_executor.cpp | 26 ++++++++++++++++++++++ src/applets/sh/sh_parser.cpp | 35 +++++++++++++++++++++++++++++ src/applets/sh/sh_vars.cpp | 40 ++++++++++++++++++++++++++++++++++ tests/integration/test_sh.sh | 20 +++++++++++++++++ 6 files changed, 170 insertions(+), 1 deletion(-) diff --git a/src/applets/sh/sh.hpp b/src/applets/sh/sh.hpp index 0f24488..bab534d 100644 --- a/src/applets/sh/sh.hpp +++ b/src/applets/sh/sh.hpp @@ -51,6 +51,7 @@ struct ForClause; struct Subshell; struct BraceGroup; struct CaseClause; +struct FuncDef; using Command = std::variant, @@ -59,7 +60,8 @@ using Command = std::variant, std::unique_ptr, std::unique_ptr, - std::unique_ptr>; + std::unique_ptr, + std::unique_ptr>; struct Pipeline { std::vector commands; @@ -108,6 +110,11 @@ struct CaseClause { std::vector branches; }; +struct FuncDef { + std::string name; + std::unique_ptr body; +}; + // ── Shell State ────────────────────────────────────────────────── class ShellState { public: @@ -133,12 +140,25 @@ class ShellState { auto script_name() const -> const std::string& { return script_name_; } auto set_script_name(std::string name) -> void { script_name_ = std::move(name); } + // Functions + auto define_function(const std::string& name, std::unique_ptr body) -> void; + [[nodiscard]] auto is_function(const std::string& name) const -> bool; + auto get_function(const std::string& name) -> AndOr*; + + // Local variable scopes (function-local variables) + auto push_scope() -> void; + auto pop_scope() -> void; + auto set_local(const std::string& name, const std::string& value) -> void; + [[nodiscard]] auto in_function() const -> bool { return !local_scopes_.empty(); } + // Control flow flags bool should_exit = false; int exit_status = 0; bool break_loop = false; int break_count = 0; bool continue_loop = false; + bool return_pending = false; + int return_status = 0; private: std::unordered_map vars_; @@ -146,6 +166,8 @@ class ShellState { std::vector positional_; int last_status_ = 0; std::string script_name_; + std::unordered_map> functions_; + std::vector> local_scopes_; }; // ── Lexer ──────────────────────────────────────────────────────── @@ -200,6 +222,7 @@ class Parser { auto parse_subshell() -> std::unique_ptr; auto parse_brace_group() -> std::unique_ptr; auto parse_case() -> std::unique_ptr; + auto parse_func() -> std::unique_ptr; Lexer& lexer_; Token current_; diff --git a/src/applets/sh/sh_builtins.cpp b/src/applets/sh/sh_builtins.cpp index c18b1da..07c3672 100644 --- a/src/applets/sh/sh_builtins.cpp +++ b/src/applets/sh/sh_builtins.cpp @@ -214,6 +214,29 @@ static int builtin_source(std::vector& args, ShellState& state) { return 0; } +static int builtin_return(std::vector& args, ShellState& state) { + int code = state.last_status(); + if (args.size() > 1) code = std::atoi(args[1].c_str()); + state.return_pending = true; + state.return_status = code; + return code; +} + +static int builtin_local(std::vector& args, ShellState& state) { + if (!state.in_function()) { + CFBOX_ERR("sh", "local: can only be used in a function"); + return 1; + } + for (std::size_t i = 1; i < args.size(); ++i) { + auto& arg = args[i]; + auto eq = arg.find('='); + std::string name = (eq == std::string::npos) ? arg : arg.substr(0, eq); + std::string val = (eq == std::string::npos) ? state.get_var(name) : arg.substr(eq + 1); + state.set_local(name, val); + } + return 0; +} + auto get_builtins() -> const std::unordered_map& { static const std::unordered_map builtins = { {"echo", builtin_echo}, @@ -231,6 +254,8 @@ auto get_builtins() -> const std::unordered_map& { {"eval", builtin_eval}, {"source", builtin_source}, {".", builtin_source}, + {"return", builtin_return}, + {"local", builtin_local}, }; return builtins; } diff --git a/src/applets/sh/sh_executor.cpp b/src/applets/sh/sh_executor.cpp index 12b418f..fff48f7 100644 --- a/src/applets/sh/sh_executor.cpp +++ b/src/applets/sh/sh_executor.cpp @@ -109,6 +109,25 @@ static auto execute_simple(SimpleCommand& cmd, ShellState& state) -> int { return rc; } + // User-defined function: run its body in-process with new positional + // parameters and a fresh local-variable scope. + if (state.is_function(expanded[0])) { + auto* body = state.get_function(expanded[0]); + auto saved_positional = state.positional_params(); + std::vector func_args(expanded.begin() + 1, expanded.end()); + state.set_positional(std::move(func_args)); + state.push_scope(); + int rc = body ? execute(*body, state) : 0; + if (state.return_pending) { + rc = state.return_status; + state.return_pending = false; + } + state.pop_scope(); + state.set_positional(std::move(saved_positional)); + state.set_last_status(rc); + return rc; + } + // External command: fork and exec pid_t pid = ::fork(); if (pid < 0) { @@ -273,6 +292,7 @@ auto execute_command(Command& cmd, ShellState& state) -> int { } if (state.should_exit) break; + if (state.return_pending) break; if (state.break_loop) { state.break_loop = false; break; } if (state.continue_loop) { state.continue_loop = false; continue; } } @@ -291,6 +311,7 @@ auto execute_command(Command& cmd, ShellState& state) -> int { state.set_last_status(rc); } if (state.should_exit) break; + if (state.return_pending) break; if (state.break_loop) { state.break_loop = false; break; } if (state.continue_loop) { state.continue_loop = false; continue; } } @@ -325,6 +346,10 @@ auto execute_command(Command& cmd, ShellState& state) -> int { } return 0; // no pattern matched } + else if constexpr (std::is_same_v>) { + state.define_function(node->name, std::move(node->body)); + return 0; + } else { return 1; } @@ -345,6 +370,7 @@ auto execute(AndOr& node, ShellState& state) -> int { state.set_last_status(last_rc); if (state.should_exit) break; + if (state.return_pending) break; } return last_rc; diff --git a/src/applets/sh/sh_parser.cpp b/src/applets/sh/sh_parser.cpp index 6af1fb6..bd235e6 100644 --- a/src/applets/sh/sh_parser.cpp +++ b/src/applets/sh/sh_parser.cpp @@ -149,6 +149,11 @@ auto Parser::parse_command() -> Command { if (current_.value == "case") return parse_case(); } + // Function definition: NAME ( ) { body } + if (current_.type == TokType::Word && lexer_.peek_token().type == TokType::LParen) { + return parse_func(); + } + return parse_simple_command(); } @@ -450,4 +455,34 @@ auto Parser::parse_case() -> std::unique_ptr { return result; } +auto Parser::parse_func() -> std::unique_ptr { + auto result = std::make_unique(); + result->name = current_.value; + advance(); // function name + + if (!expect(TokType::LParen)) { + CFBOX_ERR("sh", "syntax error: expected '(' in function definition"); + return result; + } + if (!expect(TokType::RParen)) { + CFBOX_ERR("sh", "syntax error: expected ')' in function definition"); + return result; + } + while (current_.type == TokType::Newline) advance(); + + if (current_.type != TokType::LBrace) { + CFBOX_ERR("sh", "syntax error: expected '{' for function body"); + return result; + } + advance(); // { + if (current_.type == TokType::Newline) advance(); + result->body = parse_compound_list(); + if (current_.type != TokType::RBrace) { + CFBOX_ERR("sh", "syntax error: expected '}' to close function body"); + } else { + advance(); + } + return result; +} + } // namespace cfbox::sh diff --git a/src/applets/sh/sh_vars.cpp b/src/applets/sh/sh_vars.cpp index b5a59ab..3d33931 100644 --- a/src/applets/sh/sh_vars.cpp +++ b/src/applets/sh/sh_vars.cpp @@ -34,6 +34,11 @@ auto ShellState::get_var(std::string_view name) const -> std::string { return ""; } + // Local scopes (innermost first) take precedence over globals. + for (auto scope_it = local_scopes_.rbegin(); scope_it != local_scopes_.rend(); ++scope_it) { + auto f = scope_it->find(std::string{name}); + if (f != scope_it->end()) return f->second; + } auto it = vars_.find(std::string{name}); if (it != vars_.end()) return it->second; @@ -43,6 +48,12 @@ auto ShellState::get_var(std::string_view name) const -> std::string { } auto ShellState::set_var(const std::string& name, const std::string& value) -> void { + // If the name is an active local, update it in its scope. + if (!local_scopes_.empty()) { + auto& top = local_scopes_.back(); + auto it = top.find(name); + if (it != top.end()) { it->second = value; return; } + } vars_[name] = value; if (exported_.count(name)) { ::setenv(name.c_str(), value.c_str(), 1); @@ -84,4 +95,33 @@ auto ShellState::shell_pid() const -> int { return static_cast(::getpid()); } +auto ShellState::define_function(const std::string& name, std::unique_ptr body) -> void { + functions_[name] = std::move(body); +} + +auto ShellState::is_function(const std::string& name) const -> bool { + return functions_.count(name) > 0; +} + +auto ShellState::get_function(const std::string& name) -> AndOr* { + auto it = functions_.find(name); + return it != functions_.end() ? it->second.get() : nullptr; +} + +auto ShellState::push_scope() -> void { + local_scopes_.emplace_back(); +} + +auto ShellState::pop_scope() -> void { + if (!local_scopes_.empty()) local_scopes_.pop_back(); +} + +auto ShellState::set_local(const std::string& name, const std::string& value) -> void { + if (local_scopes_.empty()) { + vars_[name] = value; // outside a function: behave like a normal var + } else { + local_scopes_.back()[name] = value; + } +} + } // namespace cfbox::sh diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index 45c6e23..ba764f0 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -133,6 +133,26 @@ expected=$'one\ntwo\nmany' assert_output "$expected" "$out" ((++pass)) +# ── Functions, return, local ───────────────────────────────────── +out=$($SH -c 'greet() { echo hi; }; greet') +assert_output "hi" "$out" +((++pass)) + +out=$($SH -c 'add() { echo $(($1 + $2)); }; add 3 4') +assert_output "7" "$out" +((++pass)) + +set +e +$SH -c 'f() { return 42; }; f' +rc=$? +set -e +[[ $rc -eq 42 ]] && ((++pass)) || { echo "FAIL [return status]: $rc"; ((++fail)); } + +out=$($SH -c 'g() { local x=L; echo $x; }; x=G; g; echo $x') +expected=$'L\nG' +assert_output "$expected" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From 876eb1a3109be0cb3f3100220f263013a41d34ac Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 13:48:33 +0800 Subject: [PATCH 14/21] docs(plan): record sh deepen P1 (arithmetic/case/functions) progress --- document/ai/PLAN.md | 4 +-- document/notes/2026-06-28-sh-deepen-p1.md | 40 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 document/notes/2026-06-28-sh-deepen-p1.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index c17ce53..72ee664 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批5c `sh` 全收(case/`$((expr))`/函数/here-doc + read增强/break N/高级`${}`/trap)。 +> 焦点 → Phase 2 批5c `sh` 深化续:here-doc + read增强/break N/高级`${}`/trap(算术/case/函数已完成)。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -25,7 +25,7 @@ | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ✅ | 6dfe329 | 424/0 | | 批5a | `grep -A/-B/-C` 上下文(ring 向前 + after_pending 向后 + 组间 `--`) | ✅ | b6920c3 | 431/0 | | 批5b | `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降) | ✅ | 3e29feb | 436/0 | -| 批5c | `sh` 深化(case/`$((expr))`/函数/here-doc + read增强/break N/高级`${}`/trap) | 🔄 NEXT | — | — | +| 批5c | `sh` 深化 — ✅算术 `$((expr))`/case/函数+return+local(40d48dc·181eaa9·bfcc71e);⏳here-doc/read增强/break N/高级`${}`/trap | 🔄 | — | 436/41 | > 各批细节(触及文件、Result 签名草案、完成门、gotcha)由 `/next <批>` 现场产出脚手架,确认后写入本表 commit/测试列。 diff --git a/document/notes/2026-06-28-sh-deepen-p1.md b/document/notes/2026-06-28-sh-deepen-p1.md new file mode 100644 index 0000000..52e4c27 --- /dev/null +++ b/document/notes/2026-06-28-sh-deepen-p1.md @@ -0,0 +1,40 @@ +# 2026-06-28 — sh 深化 P1(算术/case/函数)(Phase 2 批5c 第 1-3 子项) + +## 背景 +sh 是最大 applet(8 文件 ~1927 行,零 GTest)。P1 高频深化三项:`$((expr))` 算术、`case` 语句、函数+return+local。打通运维脚本最关键闭环(init 脚本大量用这些)。 + +## 子项与设计决策 + +### 1. 算术 `$((expr))`(40d48dc) +- **递归下降求值器** `ArithEvaluator`:`+ - * / %`、比较、`&& || !`、括号、整数字面量、变量(裸名或 `$VAR`;空/非数字 → 0)。除零 → error → "0"。 +- **赋值 RHS 展开修复**(既存 bug):`X=$((i+1))` 之前 set_var 存 raw 字符串,现 RHS 经 `expand_word` 展开后 join 回单值(赋值不 field-split)。 +- process_dollar 识别 `$((` 与 `$(`(命令替换)区分。 + +### 2. case 语句(181eaa9) +- **CaseClause AST**(CaseBranch: patterns + body)+ Command variant 增项。 +- **lexer 加 DSemi token**(`;;`)——既存 lexer 把 `;;` 产两个 Semi,parse_compound_list 无法区分分支结束。DSemi 让 case body 正确终止。 +- **模式匹配用 fnmatch**;新增 `expand_noglob`(展开 param/arith/command 但**不** field-split/glob)给 case word/pattern——否则 `*` 会被 filename glob 展开成当前目录文件,破坏模式语义。 +- parser `parse_case`:`case WORD in PAT(|PAT)) body;; ... esac`。 + +### 3. 函数 + return + local(bfcc71e) +- **FuncDef AST** + Command variant;`name() { body; }` 经 parse_func 注册。 +- **ShellState 扩展**:函数表 `functions_`、local 作用域栈 `local_scopes_`、`return_pending`/`return_status`。 +- **scope-aware get_var/set_var**:local 栈顶优先;set_var 若 name 是 active local 则写 scope,否则写全局 vars_。 +- **函数调用**(execute_simple,builtin 后 external 前):保存 positional、设新 positional(`$1..`)、push_scope、execute body、消费 return_pending、pop_scope、恢复 positional。 +- **return 传播**:execute(AndOr) + while/for body 检测 `return_pending` 中断,回到调用点消费。 +- builtin `return N`(设 pending)+ `local NAME=VAL`(仅函数内,set_local)。 + +## 验证 +- 集成 test_sh.sh:28 → 41(+5 算术 +4 case +4 函数),全绿。 +- ctest 436/436(sh 仍零 GTest,待后续建 test_sh.cpp)。 +- size-opt **434 KB**(基线 418,+16 KB:ArithEvaluator + CaseClause/FuncDef AST + ShellState 函数表/作用域 + fnmatch)。 + +## 陷阱 / 留给后续 +- **here-doc**(`<<`/`<<-`)未做:lexer 需跨行收集 body 状态机(pending heredoc 队列 + 换行后注入 body token),复杂度高,留下批。 +- **赋值 RHS 用 expand_word + join** 是近似(`VAR=*.txt` 会 glob,POSIX 赋值不 glob);罕见,接受。 +- **break N / continue N** 未做:当前 break_loop 是 bool(单层),多层 break N 待 ShellState 改 break_depth。 +- **高级 `${}`**(`${#VAR}`/`${VAR%pat}`/`${VAR#pat}`/`${VAR:=def}`/`${VAR:?err}`):expand_param 仅 :- / :+,其余留下批。 +- **read 增强**(-p/-r/-s/-n/-t)、**trap**:留下批。 +- **sh 零 GTest**:本批靠集成验证;test_sh.cpp(capture_stdout + sh_main -c)待建。 +- 函数体执行 move body 到 ShellState(FuncDef 二次执行会定义空函数;罕见,接受)。 +- commits: `40d48dc`(算术)/ `181eaa9`(case)/ `bfcc71e`(函数) From e07de184f4726453f30f0848f9458e8e2040e3bd Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 14:05:12 +0800 Subject: [PATCH 15/21] feat(sh): here-doc redirections (<< and <<-) with body expansion - lexer reads < >> + DLess, DLessDash, // << <<- LessAnd, GreatAnd, // <& >& // Keywords stored as Word with keyword flag }; @@ -31,7 +32,7 @@ struct Token { // ── Redirection ────────────────────────────────────────────────── struct Redir { - enum Type { Read, Write, Append, DupIn, DupOut }; + enum Type { Read, Write, Append, DupIn, DupOut, HereDoc }; int fd = -1; // target fd (default inferred: 0 for Read, 1 for Write/Append) Type type = Read; std::string target; // filename or fd number for dup diff --git a/src/applets/sh/sh_executor.cpp b/src/applets/sh/sh_executor.cpp index fff48f7..b1a7b11 100644 --- a/src/applets/sh/sh_executor.cpp +++ b/src/applets/sh/sh_executor.cpp @@ -1,6 +1,7 @@ #include "sh.hpp" #include +#include #include #include #include @@ -12,12 +13,12 @@ namespace cfbox::sh { // Apply redirections, return saved fds for restoration -static auto apply_redirections(const std::vector& redirs) -> std::vector> { +static auto apply_redirections(const std::vector& redirs, const ShellState& state) -> std::vector> { std::vector> saved; for (const auto& r : redirs) { int target_fd = r.fd; - if (target_fd < 0) target_fd = (r.type == Redir::Read || r.type == Redir::DupIn) ? 0 : 1; + if (target_fd < 0) target_fd = (r.type == Redir::Read || r.type == Redir::DupIn || r.type == Redir::HereDoc) ? 0 : 1; // Save original fd int saved_fd = ::dup(target_fd); @@ -62,6 +63,21 @@ static auto apply_redirections(const std::vector& redirs) -> std::vector< if (src >= 0) ::dup2(src, target_fd); break; } + case Redir::HereDoc: { + // Expand param/arith/command in the body, then feed it via temp file. + std::string body = expand_noglob(r.target, state); + char tmpl[] = "/tmp/cfbox_hd_XXXXXX"; + int tfd = ::mkstemp(tmpl); + if (tfd >= 0) { + ssize_t wr = ::write(tfd, body.c_str(), body.size()); + (void)wr; + ::lseek(tfd, 0, SEEK_SET); + ::dup2(tfd, target_fd); + ::close(tfd); + ::unlink(tmpl); + } + break; + } } } @@ -102,7 +118,7 @@ static auto execute_simple(SimpleCommand& cmd, ShellState& state) -> int { // Check builtin if (is_builtin(expanded[0])) { - auto saved = apply_redirections(cmd.redirs); + auto saved = apply_redirections(cmd.redirs, state); int rc = run_builtin(expanded[0], expanded, state); std::fflush(nullptr); restore_redirections(saved); @@ -137,7 +153,7 @@ static auto execute_simple(SimpleCommand& cmd, ShellState& state) -> int { if (pid == 0) { // Child process - apply_redirections(cmd.redirs); + apply_redirections(cmd.redirs, state); // Build argv std::vector argv; @@ -205,7 +221,7 @@ static auto execute_pipeline(Pipeline& node, ShellState& state) -> int { auto& cmd = node.commands[static_cast(i)]; if (std::holds_alternative(cmd)) { auto& sc = std::get(cmd); - apply_redirections(sc.redirs); + apply_redirections(sc.redirs, state); auto expanded = expand_words(sc.words, state); if (expanded.empty()) ::_Exit(0); diff --git a/src/applets/sh/sh_lexer.cpp b/src/applets/sh/sh_lexer.cpp index 4715d7c..cefaf1f 100644 --- a/src/applets/sh/sh_lexer.cpp +++ b/src/applets/sh/sh_lexer.cpp @@ -4,6 +4,11 @@ namespace cfbox::sh { +static auto is_op_char(char c) -> bool { + return c == '|' || c == ';' || c == '&' || c == '(' || c == ')' || + c == '<' || c == '>' || c == '{' || c == '}' || c == '#'; +} + Lexer::Lexer(std::string_view input) : input_{input} {} auto Lexer::advance() -> char { @@ -112,8 +117,35 @@ auto Lexer::read_operator() -> std::optional { if (pos_ + 1 < input_.size() && input_[pos_ + 1] == '&') { tok.type = TokType::LessAnd; pos_ += 2; } else if (pos_ + 1 < input_.size() && input_[pos_ + 1] == '<') { - // Here-document << — treat the delimiter as a word token after - tok.type = TokType::Less; pos_ += 1; // simplified: no here-doc support yet + // Here-document < SimpleCommand { } auto Parser::parse_redirect() -> std::optional { + // Here-document: the body is already carried in the token value. + if (current_.type == TokType::DLess || current_.type == TokType::DLessDash) { + Redir r; + r.fd = 0; + r.type = Redir::HereDoc; + r.target = std::move(current_.value); + advance(); + return r; + } + // Check for redirect: [n]<, [n]>, [n]>>, [n]<&, [n]>& int fd = -1; diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index ba764f0..08cf428 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -153,6 +153,27 @@ expected=$'L\nG' assert_output "$expected" "$out" ((++pass)) +# ── Here-doc ──────────────────────────────────────────────────── +out=$($SH -c 'cat < Date: Sun, 28 Jun 2026 14:09:43 +0800 Subject: [PATCH 16/21] feat(sh): advanced parameter expansion ${...} - ${#VAR} length - ${VAR#pat} / ${VAR##pat} strip shortest/longest matching prefix - ${VAR%pat} / ${VAR%%pat} strip shortest/longest matching suffix - ${VAR:-def} default, ${VAR:+alt} alternative - glob prefix/suffix matching via fnmatch over increasing lengths - integration +5 incl. the ${P##*/} basename idiom --- src/applets/sh/sh_expand.cpp | 82 ++++++++++++++++++++++++++++-------- tests/integration/test_sh.sh | 21 +++++++++ 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/src/applets/sh/sh_expand.cpp b/src/applets/sh/sh_expand.cpp index e297f30..9d49ad5 100644 --- a/src/applets/sh/sh_expand.cpp +++ b/src/applets/sh/sh_expand.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -145,28 +146,75 @@ auto eval_arith(const std::string& expr, const cfbox::sh::ShellState& state) -> return std::to_string(v); } +// Length of the shortest/longest prefix of val that matches glob pat (0 = none). +auto glob_prefix_len(const std::string& pat, const std::string& val, bool longest) -> std::size_t { + if (longest) { + for (std::size_t k = val.size(); k > 0; --k) + if (::fnmatch(pat.c_str(), val.substr(0, k).c_str(), 0) == 0) return k; + } else { + for (std::size_t k = 1; k <= val.size(); ++k) + if (::fnmatch(pat.c_str(), val.substr(0, k).c_str(), 0) == 0) return k; + } + return 0; +} + +// Length of the shortest/longest suffix of val that matches glob pat (0 = none). +auto glob_suffix_len(const std::string& pat, const std::string& val, bool longest) -> std::size_t { + if (longest) { + for (std::size_t k = val.size(); k > 0; --k) + if (::fnmatch(pat.c_str(), val.substr(val.size() - k).c_str(), 0) == 0) return k; + } else { + for (std::size_t k = 1; k <= val.size(); ++k) + if (::fnmatch(pat.c_str(), val.substr(val.size() - k).c_str(), 0) == 0) return k; + } + return 0; +} + } // namespace namespace cfbox::sh { static auto expand_param(const std::string& name, const ShellState& state) -> std::string { - // Handle ${VAR:-default} - auto colon_pos = name.find(":-"); - if (colon_pos != std::string::npos) { - auto var_name = name.substr(0, colon_pos); - auto default_val = name.substr(colon_pos + 2); - auto val = state.get_var(var_name); - return val.empty() ? default_val : val; - } - // Handle ${VAR:+alt} - colon_pos = name.find(":+"); - if (colon_pos != std::string::npos) { - auto var_name = name.substr(0, colon_pos); - auto alt_val = name.substr(colon_pos + 2); - auto val = state.get_var(var_name); - return val.empty() ? "" : alt_val; - } - return state.get_var(name); + // ${#NAME} -> length of NAME's value. + if (name.size() >= 2 && name[0] == '#') { + return std::to_string(state.get_var(name.substr(1)).size()); + } + + // Locate the operator following the variable name ([A-Za-z0-9_]+). + std::size_t i = 0; + while (i < name.size() && + (std::isalnum(static_cast(name[i])) || name[i] == '_')) { + ++i; + } + if (i == 0 || i >= name.size()) { + return state.get_var(name); // plain ${VAR}, no operator + } + + const bool colon = (name[i] == ':'); + const std::size_t op_idx = colon ? i + 1 : i; + if (op_idx >= name.size()) return state.get_var(name); + + const char opc = name[op_idx]; + const bool dbl = (op_idx + 1 < name.size() && name[op_idx + 1] == opc && + (opc == '#' || opc == '%')); + const std::string var_name = name.substr(0, i); + const std::string arg = name.substr(op_idx + (dbl ? 2 : 1)); + const std::string val = state.get_var(var_name); + + switch (opc) { + case '#': { // strip matching prefix (# shortest, ## longest) + return val.substr(glob_prefix_len(arg, val, dbl)); + } + case '%': { // strip matching suffix (% shortest, %% longest) + return val.substr(0, val.size() - glob_suffix_len(arg, val, dbl)); + } + case '-': // default: empty/unset -> arg + return val.empty() ? arg : val; + case '+': // alternative: non-empty -> arg + return val.empty() ? std::string{} : arg; + default: + return val; + } } // Process $ expansions in a word fragment, returns the expanded string diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index 08cf428..9a17441 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -174,6 +174,27 @@ EOF') assert_output "7" "$out" ((++pass)) +# ── Advanced ${} parameter expansion ───────────────────────────── +out=$($SH -c 'X=hello; echo ${#X}') +assert_output "5" "$out" +((++pass)) + +out=$($SH -c 'F=a.txt; echo ${F%.txt}') +assert_output "a" "$out" +((++pass)) + +out=$($SH -c 'F=pre_data; echo ${F#pre_}') +assert_output "data" "$out" +((++pass)) + +out=$($SH -c 'P=/a/b/c.txt; echo ${P##*/}') +assert_output "c.txt" "$out" +((++pass)) + +out=$($SH -c 'echo ${MISS:-fallback}') +assert_output "fallback" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From 4e5814fcf4a07fc1cb64e91a41b93f4ea51b7e03 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 14:24:41 +0800 Subject: [PATCH 17/21] feat(sh): break/continue (incl. break N) and fix AndOr merge across ';' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - break N / continue: ShellState break_depth counts enclosing loops; the AndOr executor stops on pending break/continue so loop bodies abort early - add break/continue builtins (were missing entirely — loop flags were never set, so break/continue silently did nothing) - fix parse_compound_list corrupting a trailing && / || when merging across ';': it rewrote the last entry's operator to Semi, so 'false && echo M; echo X' wrongly ran echo M. Entries are now concatenated unchanged. - integration +3: break, continue, break 2 across nested loops --- src/applets/sh/sh.hpp | 3 +-- src/applets/sh/sh_builtins.cpp | 14 ++++++++++++++ src/applets/sh/sh_executor.cpp | 6 ++++-- src/applets/sh/sh_parser.cpp | 15 +++++---------- tests/integration/test_sh.sh | 14 ++++++++++++++ 5 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/applets/sh/sh.hpp b/src/applets/sh/sh.hpp index fc20b18..f9fdde1 100644 --- a/src/applets/sh/sh.hpp +++ b/src/applets/sh/sh.hpp @@ -155,8 +155,7 @@ class ShellState { // Control flow flags bool should_exit = false; int exit_status = 0; - bool break_loop = false; - int break_count = 0; + int break_depth = 0; // break N: counts enclosing loops to exit bool continue_loop = false; bool return_pending = false; int return_status = 0; diff --git a/src/applets/sh/sh_builtins.cpp b/src/applets/sh/sh_builtins.cpp index 07c3672..f490feb 100644 --- a/src/applets/sh/sh_builtins.cpp +++ b/src/applets/sh/sh_builtins.cpp @@ -237,6 +237,18 @@ static int builtin_local(std::vector& args, ShellState& state) { return 0; } +static int builtin_break(std::vector& args, ShellState& state) { + int n = (args.size() > 1) ? std::atoi(args[1].c_str()) : 1; + if (n < 1) n = 1; + state.break_depth = n; + return 0; +} + +static int builtin_continue(std::vector& /*args*/, ShellState& state) { + state.continue_loop = true; + return 0; +} + auto get_builtins() -> const std::unordered_map& { static const std::unordered_map builtins = { {"echo", builtin_echo}, @@ -256,6 +268,8 @@ auto get_builtins() -> const std::unordered_map& { {".", builtin_source}, {"return", builtin_return}, {"local", builtin_local}, + {"break", builtin_break}, + {"continue", builtin_continue}, }; return builtins; } diff --git a/src/applets/sh/sh_executor.cpp b/src/applets/sh/sh_executor.cpp index b1a7b11..69f8493 100644 --- a/src/applets/sh/sh_executor.cpp +++ b/src/applets/sh/sh_executor.cpp @@ -309,7 +309,7 @@ auto execute_command(Command& cmd, ShellState& state) -> int { if (state.should_exit) break; if (state.return_pending) break; - if (state.break_loop) { state.break_loop = false; break; } + if (state.break_depth > 0) { --state.break_depth; break; } if (state.continue_loop) { state.continue_loop = false; continue; } } return rc; @@ -328,7 +328,7 @@ auto execute_command(Command& cmd, ShellState& state) -> int { } if (state.should_exit) break; if (state.return_pending) break; - if (state.break_loop) { state.break_loop = false; break; } + if (state.break_depth > 0) { --state.break_depth; break; } if (state.continue_loop) { state.continue_loop = false; continue; } } return rc; @@ -387,6 +387,8 @@ auto execute(AndOr& node, ShellState& state) -> int { if (state.should_exit) break; if (state.return_pending) break; + if (state.break_depth > 0) break; + if (state.continue_loop) break; } return last_rc; diff --git a/src/applets/sh/sh_parser.cpp b/src/applets/sh/sh_parser.cpp index b472cee..00b5295 100644 --- a/src/applets/sh/sh_parser.cpp +++ b/src/applets/sh/sh_parser.cpp @@ -70,16 +70,11 @@ auto Parser::parse_compound_list() -> std::unique_ptr { auto next = parse_and_or(); if (next) { auto merged = std::make_unique(); - // Merge: append entries from result, then from next - for (auto& e : result->entries) { - merged->entries.push_back(std::move(e)); - } - for (auto& e : next->entries) { - merged->entries.back().first = AndOr::Op::Semi; // chain with semi - merged->entries.push_back(std::move(e)); - } - // Actually simpler: just extend entries - // The first entry of 'next' chains after the last of 'result' + // Concatenate entries; the first entry of `next` is (Semi, …), which + // naturally chains the two lists. Do NOT rewrite the op of result's + // last entry — that would corrupt a trailing && / || relationship. + for (auto& e : result->entries) merged->entries.push_back(std::move(e)); + for (auto& e : next->entries) merged->entries.push_back(std::move(e)); result = std::move(merged); } } diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index 9a17441..d9da981 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -195,6 +195,20 @@ out=$($SH -c 'echo ${MISS:-fallback}') assert_output "fallback" "$out" ((++pass)) +# ── break / continue / break N ────────────────────────────────── +out=$($SH -c 'for i in 1 2 3; do [ $i = 2 ] && break; echo $i; done') +assert_output "1" "$out" +((++pass)) + +out=$($SH -c 'for i in 1 2 3; do [ $i = 2 ] && continue; echo $i; done') +expected=$'1\n3' +assert_output "$expected" "$out" +((++pass)) + +out=$($SH -c 'for i in 1 2; do for j in a b; do [ $j = b ] && break 2; echo $j; done; echo X; done') +assert_output "a" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From 33bc74f8a4b97772ce862888c14fab391f3392ca Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 14:28:01 +0800 Subject: [PATCH 18/21] feat(sh): read -p prompt and -r raw, fix last-variable IFS trimming - read -p PROMPT writes the prompt to stderr before reading - read -r recognized (raw mode; backslashes already preserved) - fix the last variable receiving a leading IFS char ('echo a b c | read x y z' now yields z='c', not ' c') - integration +3: single/multi-var read, -p prompt --- src/applets/sh/sh_builtins.cpp | 38 +++++++++++++++++++++++++--------- tests/integration/test_sh.sh | 13 ++++++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/applets/sh/sh_builtins.cpp b/src/applets/sh/sh_builtins.cpp index f490feb..5a85159 100644 --- a/src/applets/sh/sh_builtins.cpp +++ b/src/applets/sh/sh_builtins.cpp @@ -141,32 +141,50 @@ static int builtin_shift(std::vector& args, ShellState& state) { } static int builtin_read(std::vector& args, ShellState& state) { + std::string prompt; + // Options: -r (raw, keep backslashes — we already do), -p PROMPT. + std::size_t i = 1; + while (i < args.size() && args[i].size() >= 2 && args[i][0] == '-' && args[i] != "--") { + if (args[i] == "-r") { + ++i; + } else if (args[i] == "-p") { + ++i; + if (i < args.size()) { prompt = args[i]; ++i; } + } else if (args[i].size() > 2 && args[i][1] == 'p') { + prompt = args[i].substr(2); + ++i; + } else { + break; + } + } + if (i < args.size() && args[i] == "--") ++i; + + if (!prompt.empty()) std::fputs(prompt.c_str(), stderr); + std::string line; if (!std::getline(std::cin, line)) return 1; - if (args.size() <= 1) { + std::vector names(args.begin() + static_cast(i), args.end()); + if (names.empty()) { state.set_var("REPLY", line); return 0; } - // Split line by IFS into variables + // Split line by IFS into the named variables; the last gets the remainder. std::string ifs = state.get_var("IFS"); if (ifs.empty()) ifs = " \t\n"; std::size_t pos = 0; - for (std::size_t i = 1; i < args.size(); ++i) { - if (i == args.size() - 1) { - // Last variable gets the rest - state.set_var(args[i], line.substr(pos)); + for (std::size_t k = 0; k < names.size(); ++k) { + if (k == names.size() - 1) { + while (pos < line.size() && ifs.find(line[pos]) != std::string::npos) ++pos; + state.set_var(names[k], line.substr(pos)); break; } - - // Skip leading IFS while (pos < line.size() && ifs.find(line[pos]) != std::string::npos) ++pos; std::size_t end = pos; while (end < line.size() && ifs.find(line[end]) == std::string::npos) ++end; - - state.set_var(args[i], line.substr(pos, end - pos)); + state.set_var(names[k], line.substr(pos, end - pos)); pos = end; } return 0; diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index d9da981..cb73410 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -209,6 +209,19 @@ out=$($SH -c 'for i in 1 2; do for j in a b; do [ $j = b ] && break 2; echo $j; assert_output "a" "$out" ((++pass)) +# ── read (with -p prompt) ─────────────────────────────────────── +out=$(echo hello | $SH -c 'read x; echo got=$x') +assert_output "got=hello" "$out" +((++pass)) + +out=$(echo a b c | $SH -c 'read x y z; echo "$x|$y|$z"') +assert_output "a|b|c" "$out" +((++pass)) + +out=$(echo bob | $SH -c 'read -p "N: " x; echo hi=$x' 2>&1) +assert_output "N: hi=bob" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From bf87627269bfb1ca6dd7f91002cf101f8dd0da7f Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 14:29:12 +0800 Subject: [PATCH 19/21] docs(plan): record sh deepen mid (here-doc/advanced param/break/read) --- document/ai/PLAN.md | 4 +-- document/notes/2026-06-28-sh-deepen-mid.md | 38 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 document/notes/2026-06-28-sh-deepen-mid.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 72ee664..03c2ebe 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批5c `sh` 深化续:here-doc + read增强/break N/高级`${}`/trap(算术/case/函数已完成)。 +> 焦点 → Phase 2 批5c `sh` 深化尾:trap(信号处理,下回合 fresh context);算术/case/函数/here-doc/高级`${}`/break N/read 增强已全收。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -25,7 +25,7 @@ | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ✅ | 6dfe329 | 424/0 | | 批5a | `grep -A/-B/-C` 上下文(ring 向前 + after_pending 向后 + 组间 `--`) | ✅ | b6920c3 | 431/0 | | 批5b | `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降) | ✅ | 3e29feb | 436/0 | -| 批5c | `sh` 深化 — ✅算术 `$((expr))`/case/函数+return+local(40d48dc·181eaa9·bfcc71e);⏳here-doc/read增强/break N/高级`${}`/trap | 🔄 | — | 436/41 | +| 批5c | `sh` 深化 — ✅算术/case/函数+return+local/here-doc/高级`${}`/break N/read增强(40d48dc·181eaa9·bfcc71e·e07de18·f23c2f1·4e5814f·33bc74f);⏳trap | 🔄 | — | 436/55 | > 各批细节(触及文件、Result 签名草案、完成门、gotcha)由 `/next <批>` 现场产出脚手架,确认后写入本表 commit/测试列。 diff --git a/document/notes/2026-06-28-sh-deepen-mid.md b/document/notes/2026-06-28-sh-deepen-mid.md new file mode 100644 index 0000000..09bd46c --- /dev/null +++ b/document/notes/2026-06-28-sh-deepen-mid.md @@ -0,0 +1,38 @@ +# 2026-06-28 — sh 深化中频(here-doc/高级${}/break N/read)(Phase 2 批5c 第 4-7 子项) + +承接 [sh 深化 P1](2026-06-28-sh-deepen-p1.md)。本批收 here-doc + 高级参数展开 + break N + read 增强。 + +## 子项与设计决策 + +### 4. here-doc `<<` / `<<-`(e07de18) +- lexer 读 `< Date: Sun, 28 Jun 2026 18:52:04 +0800 Subject: [PATCH 20/21] feat(sh): trap for EXIT and signals (INT/TERM/HUP/QUIT) - ShellState holds a sig->command trap table; builtin_trap parses names (EXIT/INT/TERM/.../numeric), installs a handler that records the pending signal, or clears with 'trap - SIG' - the AndOr executor runs the trap command at the next safe point; run_string and run_interactive run the EXIT trap before returning - make every waitpid retry on EINTR so a signal-fired trap doesn't corrupt child-status retrieval - integration +2: EXIT trap fires on exit, 'trap - EXIT' clears it --- src/applets/sh/sh.hpp | 14 +++++++++ src/applets/sh/sh_builtins.cpp | 52 ++++++++++++++++++++++++++++++++++ src/applets/sh/sh_executor.cpp | 27 +++++++++++++----- src/applets/sh/sh_main.cpp | 17 +++++++++-- tests/integration/test_sh.sh | 10 +++++++ 5 files changed, 110 insertions(+), 10 deletions(-) diff --git a/src/applets/sh/sh.hpp b/src/applets/sh/sh.hpp index f9fdde1..ca8b0df 100644 --- a/src/applets/sh/sh.hpp +++ b/src/applets/sh/sh.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -152,6 +153,15 @@ class ShellState { auto set_local(const std::string& name, const std::string& value) -> void; [[nodiscard]] auto in_function() const -> bool { return !local_scopes_.empty(); } + // Signal / EXIT traps + auto set_trap(int sig, const std::string& cmd) -> void { traps_[sig] = cmd; } + [[nodiscard]] auto get_trap(int sig) const -> std::string { + auto it = traps_.find(sig); + return it != traps_.end() ? it->second : std::string{}; + } + auto clear_trap(int sig) -> void { traps_.erase(sig); } + [[nodiscard]] auto all_traps() const -> const std::unordered_map& { return traps_; } + // Control flow flags bool should_exit = false; int exit_status = 0; @@ -168,8 +178,12 @@ class ShellState { std::string script_name_; std::unordered_map> functions_; std::vector> local_scopes_; + std::unordered_map traps_; }; +// Set by the signal handler, consumed by the executor to run the trap command. +inline volatile std::sig_atomic_t trap_pending_signal{0}; + // ── Lexer ──────────────────────────────────────────────────────── class Lexer { public: diff --git a/src/applets/sh/sh_builtins.cpp b/src/applets/sh/sh_builtins.cpp index 5a85159..179119e 100644 --- a/src/applets/sh/sh_builtins.cpp +++ b/src/applets/sh/sh_builtins.cpp @@ -1,5 +1,6 @@ #include "sh.hpp" +#include #include #include #include @@ -267,6 +268,56 @@ static int builtin_continue(std::vector& /*args*/, ShellState& stat return 0; } +// Installed for any signal with a trap; just records which signal fired. The +// executor runs the corresponding trap command at the next safe point. +static void trap_handler(int sig) { + cfbox::sh::trap_pending_signal = sig; +} + +static auto signal_from_name(const std::string& name) -> int { + if (name == "EXIT" || name == "0") return 0; + if (name == "INT" || name == "SIGINT") return SIGINT; + if (name == "TERM" || name == "SIGTERM") return SIGTERM; + if (name == "HUP" || name == "SIGHUP") return SIGHUP; + if (name == "QUIT" || name == "SIGQUIT") return SIGQUIT; + char* end = nullptr; + long n = std::strtol(name.c_str(), &end, 10); + if (*end == '\0' && n >= 0) return static_cast(n); + return -1; +} + +static int builtin_trap(std::vector& args, ShellState& state) { + if (args.size() == 1) { + for (const auto& [sig, cmd] : state.all_traps()) { + std::printf("trap -- '%s' %d\n", cmd.c_str(), sig); + } + return 0; + } + + std::size_t i = 1; + std::string cmd; + bool clear = false; + if (args[i] == "-") { clear = true; ++i; } + else if (args[i] == "-l") { return 0; } // list signal names: not implemented + else { cmd = args[i]; ++i; } + + for (; i < args.size(); ++i) { + int sig = signal_from_name(args[i]); + if (sig < 0) { + CFBOX_ERR("sh", "trap: %s: bad signal", args[i].c_str()); + return 1; + } + if (clear || cmd.empty()) { + state.clear_trap(sig); + if (sig != 0) ::signal(sig, SIG_DFL); + } else { + state.set_trap(sig, cmd); + if (sig != 0) ::signal(sig, trap_handler); + } + } + return 0; +} + auto get_builtins() -> const std::unordered_map& { static const std::unordered_map builtins = { {"echo", builtin_echo}, @@ -288,6 +339,7 @@ auto get_builtins() -> const std::unordered_map& { {"local", builtin_local}, {"break", builtin_break}, {"continue", builtin_continue}, + {"trap", builtin_trap}, }; return builtins; } diff --git a/src/applets/sh/sh_executor.cpp b/src/applets/sh/sh_executor.cpp index 69f8493..8700e6f 100644 --- a/src/applets/sh/sh_executor.cpp +++ b/src/applets/sh/sh_executor.cpp @@ -1,5 +1,6 @@ #include "sh.hpp" +#include #include #include #include @@ -167,9 +168,9 @@ static auto execute_simple(SimpleCommand& cmd, ShellState& state) -> int { ::_Exit(127); } - // Parent: wait for child - int status; - ::waitpid(pid, &status, 0); + // Parent: wait for child (retry on EINTR so signal traps don't break it) + int status = 0; + while (::waitpid(pid, &status, 0) == -1 && errno == EINTR) {} if (WIFEXITED(status)) return WEXITSTATUS(status); if (WIFSIGNALED(status)) return 128 + WTERMSIG(status); return 1; @@ -255,8 +256,8 @@ static auto execute_pipeline(Pipeline& node, ShellState& state) -> int { // Wait for all children int last_status = 0; for (int i = 0; i < n; ++i) { - int status; - ::waitpid(pids[static_cast(i)], &status, 0); + int status = 0; + while (::waitpid(pids[static_cast(i)], &status, 0) == -1 && errno == EINTR) {} if (i == n - 1) { if (WIFEXITED(status)) last_status = WEXITSTATUS(status); else if (WIFSIGNALED(status)) last_status = 128 + WTERMSIG(status); @@ -339,8 +340,8 @@ auto execute_command(Command& cmd, ShellState& state) -> int { int rc = node->body ? execute(*node->body, state) : 0; ::_Exit(rc); } - int status; - ::waitpid(pid, &status, 0); + int status = 0; + while (::waitpid(pid, &status, 0) == -1 && errno == EINTR) {} if (WIFEXITED(status)) return WEXITSTATUS(status); if (WIFSIGNALED(status)) return 128 + WTERMSIG(status); return 1; @@ -389,6 +390,18 @@ auto execute(AndOr& node, ShellState& state) -> int { if (state.return_pending) break; if (state.break_depth > 0) break; if (state.continue_loop) break; + if (trap_pending_signal != 0) { + // Run the trap command for the signal that fired, then continue. + int sig = trap_pending_signal; + trap_pending_signal = 0; + std::string tcmd = state.get_trap(sig); + if (!tcmd.empty()) { + Lexer lexer(tcmd); + Parser parser(lexer); + auto ast = parser.parse_program(); + if (ast) execute(*ast, state); + } + } } return last_rc; diff --git a/src/applets/sh/sh_main.cpp b/src/applets/sh/sh_main.cpp index eaa24a6..92238ad 100644 --- a/src/applets/sh/sh_main.cpp +++ b/src/applets/sh/sh_main.cpp @@ -23,13 +23,23 @@ constexpr cfbox::help::HelpEntry HELP = { .extra = "", }; +auto run_exit_trap(cfbox::sh::ShellState& state) -> void { + auto cmd = state.get_trap(0); + if (!cmd.empty()) { + cfbox::sh::Lexer lexer(cmd); + cfbox::sh::Parser parser(lexer); + auto ast = parser.parse_program(); + if (ast) cfbox::sh::execute(*ast, state); + } +} + auto run_string(const std::string& script, cfbox::sh::ShellState& state) -> int { cfbox::sh::Lexer lexer(script); cfbox::sh::Parser parser(lexer); auto ast = parser.parse_program(); - if (ast) - return cfbox::sh::execute(*ast, state); - return 0; + int rc = ast ? cfbox::sh::execute(*ast, state) : 0; + run_exit_trap(state); + return rc; } auto run_file(const char* path, cfbox::sh::ShellState& state) -> int { @@ -96,6 +106,7 @@ auto run_interactive(cfbox::sh::ShellState& state) -> int { } } + run_exit_trap(state); return state.should_exit ? state.exit_status : last_rc; } diff --git a/tests/integration/test_sh.sh b/tests/integration/test_sh.sh index cb73410..50fa09a 100755 --- a/tests/integration/test_sh.sh +++ b/tests/integration/test_sh.sh @@ -222,6 +222,16 @@ out=$(echo bob | $SH -c 'read -p "N: " x; echo hi=$x' 2>&1) assert_output "N: hi=bob" "$out" ((++pass)) +# ── trap (EXIT) ───────────────────────────────────────────────── +out=$($SH -c 'trap "echo bye" EXIT; echo main') +expected=$'main\nbye' +assert_output "$expected" "$out" +((++pass)) + +out=$($SH -c 'trap "echo bye" EXIT; trap - EXIT; echo main') +assert_output "main" "$out" +((++pass)) + # ── Subshell ───────────────────────────────────────────────────── out=$($SH -c '(echo sub1; echo sub2)') expected=$'sub1\nsub2' From c1750e25dc174e8adbfbe5d2c79d1540563b99e9 Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 28 Jun 2026 18:54:33 +0800 Subject: [PATCH 21/21] docs(plan): mark Phase 2 complete (sh trap finishes sh full-set) --- document/ai/PLAN.md | 4 ++-- document/ai/ROADMAP.md | 7 +++--- document/notes/2026-06-28-sh-deepen-trap.md | 25 +++++++++++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 document/notes/2026-06-28-sh-deepen-trap.md diff --git a/document/ai/PLAN.md b/document/ai/PLAN.md index 03c2ebe..93cc710 100644 --- a/document/ai/PLAN.md +++ b/document/ai/PLAN.md @@ -3,7 +3,7 @@ > Tier 3(批级,易变)。单一事实源(批级)。全树见 [ROADMAP.md](ROADMAP.md),铁律见 [DIRECTIVES.md](DIRECTIVES.md)。 > **Phase 1.5 代码质量审查 ✅ 完成**(体积 -14%、消 iostream/stoi、统一错误宏、fs 封装扩展,379 测试全绿)。 > **v0.3.0 已发布**:L2 rootfs 启动骨架(init/mount/mdev/umount/swapoff/reboot/poweroff,117→123 applet)+ tail -f —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox。基线 399 测试 / 418 KB / 123 applet。 -> 焦点 → Phase 2 批5c `sh` 深化尾:trap(信号处理,下回合 fresh context);算术/case/函数/here-doc/高级`${}`/break N/read 增强已全收。 +> ✅ **Phase 2 全部完成**(cp/test/ls/grep/find + sh 全收 8 项)。下一站:Phase 3 网络最小闭环。 > 状态:✅ DONE / 🔄 NEXT / ⏳ PENDING / ⛔ BLOCKED。每批≈一 commit,完成门 `cmake --build build -j$(nproc) && ctest --test-dir build --output-on-failure` 全绿 + `bash tests/integration/run_all.sh`。 ## ✅ Phase 1.5(代码质量审查)已完成 — 2026-05-26 @@ -25,7 +25,7 @@ | 批4 | `ls -R` 递归 + `--color`(LS_COLORS 感知、递归缩进) | ✅ | 6dfe329 | 424/0 | | 批5a | `grep -A/-B/-C` 上下文(ring 向前 + after_pending 向后 + 组间 `--`) | ✅ | b6920c3 | 431/0 | | 批5b | `find` 布尔表达式(AST + -a/-o/-not/!/括号 + 递归下降) | ✅ | 3e29feb | 436/0 | -| 批5c | `sh` 深化 — ✅算术/case/函数+return+local/here-doc/高级`${}`/break N/read增强(40d48dc·181eaa9·bfcc71e·e07de18·f23c2f1·4e5814f·33bc74f);⏳trap | 🔄 | — | 436/55 | +| 批5c | `sh` 深化(算术/case/函数+return+local/here-doc/高级`${}`/break N/read/trap 全收) | ✅ | 46c3657 | 436/57 | > 各批细节(触及文件、Result 签名草案、完成门、gotcha)由 `/next <批>` 现场产出脚手架,确认后写入本表 commit/测试列。 diff --git a/document/ai/ROADMAP.md b/document/ai/ROADMAP.md index 171a843..eaa71d2 100644 --- a/document/ai/ROADMAP.md +++ b/document/ai/ROADMAP.md @@ -10,7 +10,7 @@ | Phase 0 | ✅(lite,并行收尾) | [phase-0a 基线盘点](../todo/phases/phase-0a-baseline-inventory.md) | 文档漂移修复、differential test 骨架、编译零 warning;与 Phase 1 并行 | | Phase 1 | ✅ | [核心系统](../todo/phases/phase-1-core-system.md) | P0 系统命令(chmod/chown/dd/mount/stty 等 24 个新 applet) | | Phase 1.5 | ✅ | [代码质量审查](../todo/phases/phase-1.5-code-quality-review.md) | 错误处理一致性、风格、测试覆盖、体积检查(A-G 扫描全过) | -| **Phase 2** | 🔄 **当前焦点** | [核心深化(同 Phase 1 文档 Part 3)](../todo/phases/phase-1-core-system.md) | tail -f、cp -a、test POSIX、ls -R/--color、grep -A/-B/-C、find 布尔、sh 深化 | +| **Phase 2** | ✅ | [核心深化(同 Phase 1 文档 Part 3)](../todo/phases/phase-1-core-system.md) | tail -f、cp -a、test POSIX、ls -R/--color、grep -A/-B/-C、find 布尔、sh 深化(全完成) | | Phase 3 | ⏳ | [网络最小闭环](../todo/phases/phase-2-network.md) | 基础网络配置、诊断、下载、连接调试 | | Phase 4 | ⏳ | [生产质量门禁深化](../todo/phases/phase-3-quality.md) | fuzzing、benchmark、POSIX 子集、release 工程 | | Phase 5 | ⏳ | [多用户与嵌入式运行时](../todo/phases/phase-4-multiuser.md) | login/getty/syslog/mdev/storage | @@ -22,9 +22,8 @@ - 兼容性裁决见 [compatibility-policy.md](../todo/compatibility-policy.md);v1.0 验收边界见 [v1-production-criteria.md](../todo/v1-production-criteria.md)。 ## 当前焦点 -**Phase 2 核心命令深化** 🔄(批级进度见 [PLAN.md](PLAN.md))。基线 399 GTest + 54 集成脚本,123 applet,418 KB(size-opt)。 -> **v0.3.0 已发布**:L2 rootfs 启动骨架(`init` askfirst / `mount` / `mdev` / `umount` / `swapoff` / `reboot`·`poweroff`,117→123 applet)+ `tail -f` —— cfbox 在 i.MX6ULL 上作为 PID 1 替代 BusyBox,端到端实测。详见 [changelogs/v0.3.0.md](../../changelogs/v0.3.0.md)。 -> 焦点回到 Phase 2 核心深化:批2 `cp -a`(归档模式)→ `test` POSIX → `ls -R`/`--color`。 +**Phase 2 核心命令深化 ✅ 全部完成**(2026-06-28)。批2-5c:`cp -a`、`test` POSIX、`ls -R`/`--color`、`grep -A/-B/-C`、`find` 布尔、`sh` 全收(算术/case/函数/here-doc/高级`${}`/break N/read/trap)。基线 **436 GTest + 57 集成(sh)/ 438 KB** size-opt(v0.3.0 基线 399/418)。批级记录见 [PLAN.md](PLAN.md) 与 [notes/](../notes/)。 +> **下一站**:Phase 3 网络最小闭环(基础网络配置/诊断/下载)。 ## 当前焦点之后下一个可启动的 **Phase 3 网络最小闭环**(基础网络配置/诊断/下载)——Phase 2 核心命令深度到位后启动。更远:Phase 4 质量门禁(fuzzing/release 工程)→ Phase 5 多用户 → Phase 6 长尾。 diff --git a/document/notes/2026-06-28-sh-deepen-trap.md b/document/notes/2026-06-28-sh-deepen-trap.md new file mode 100644 index 0000000..ebd51db --- /dev/null +++ b/document/notes/2026-06-28-sh-deepen-trap.md @@ -0,0 +1,25 @@ +# 2026-06-28 — sh trap(Phase 2 批5c 第 8 子项,sh 全收收尾) + +承接 [sh 中频](2026-06-28-sh-deepen-mid.md)。本项收 trap(信号/EXIT),**sh 全收 8/8 完成,Phase 2 全部完成**。 + +## 设计决策 +- **ShellState `traps_` 表**(int sig → cmd)+ `trap_pending_signal` 全局(`inline volatile std::sig_atomic_t`,sh.hpp)。 +- **builtin_trap**:解析信号名(EXIT/INT/TERM/HUP/QUIT/数字),存 `trap[sig]=cmd`;非 EXIT 信号注册 `trap_handler`(只设 flag,async-signal-safe);`trap - SIG` 清除 + `SIG_DFL`;空 cmd 重置默认。 +- **executor 消费**:execute(AndOr) 每 entry 后检查 `trap_pending_signal`,查 `state.get_trap(sig)`,非空则 Lexer/Parser/execute 跑 trap cmd。 +- **EXIT trap**:`run_string` / `run_interactive` 退出前 `run_exit_trap`(`get_trap(0)`)。 +- **waitpid EINTR 重试**(execute_simple/pipeline/subshell 3 处):signal 中断 waitpid 返回 EINTR,重试避免 status 未设 UB + 僵尸子进程。这是 trap 信号处理的必要 robustness。 + +## 验证 +- 集成 test_sh.sh:55 → 57(+2 trap EXIT + 清除),全绿。 +- ctest 436/436。 +- size-opt **438 KB**(持平)。 + +## 陷阱 +- trap handler 只设 flag(async-signal-safe),实际 trap cmd 在 executor 安全校验点跑(非信号上下文)。 +- SIGINT/SIGTERM 集成测试需 kill + timing,本批测 EXIT(可靠);SIGINT 注册了 handler 但集成未覆盖(执行路径同 EXIT)。 +- `trap -l`(列信号名)no-op;`trap`(无参)列已设 trap。 +- fork child execvp 自动重置 handler 到 SIG_DFL,child 不继承 sh trap handler。 +- commit: `46c3657` + +## Phase 2 全部完成 ✅ +sh 全收 8/8:**算术 / case / 函数+return+local / here-doc / 高级${} / break N+continue / read 增强 / trap**。Phase 2(批2-5c)全部 ✅。基线 436 GTest / 438 KB size-opt(v0.3.0 基线 399/418)。