From 3e90d3cab8d3cb5e3b9ee86cc45fcd0bdf70ad5d Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Tue, 23 Jun 2026 16:47:23 +0800 Subject: [PATCH 1/2] fix(cortex-m3): ARMv7E-M DSP scope-out as clean faults (T3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit t32_misc_reverse keyed only on op2 (hw2[7:4]), ignoring op1 (hw1[7:4]). Its case-0x8 REV.W path accepted any op1, so ARMv7E-M DSP encodings (SXTAH/UXTAH/SXTB16/UXTB16/QADD/QSUB/QDADD/QDSUB/SEL) — UndefinedInstruction on Cortex-M3 — silently executed as reverse ops. Add an op1 gate: CLZ is op1==0xB & op2==8; the REV/REV16/RBIT/REVSH family is op1==0x9. Verified authoritative via -mcpu=cortex-m4 objdump (the M3 assembler rejects these instructions). Add V7emDspInstructionsCleanFault regression covering all 18 §4 DSP encodings. ctest 272/272 green, no regression. --- document/notes/012-thumb2-clean-fault-gate.md | 53 +++++++++++++++++++ .../cortex_m3/cortex_m3_thumb32_dataproc.cpp | 13 ++++- test/test_cortex_m3_faults.cpp | 40 ++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 document/notes/012-thumb2-clean-fault-gate.md diff --git a/document/notes/012-thumb2-clean-fault-gate.md b/document/notes/012-thumb2-clean-fault-gate.md new file mode 100644 index 0000000..e00f5a1 --- /dev/null +++ b/document/notes/012-thumb2-clean-fault-gate.md @@ -0,0 +1,53 @@ +# 012 — Thumb-2 §4 作用域外 clean-fault 门禁(T3) + +> Thumb-2 全覆盖里程碑 · T3。把「ARMv7E-M DSP 指令在 Cortex-M3 上应 UndefinedInstruction」从 ad-hoc verify 固化成 ctest 回归。过程中发现并修复 `t32_misc_reverse` 的静默误中 bug。ctest **272/272 绿**(271 + 1 新测试)。 + +## 背景 + +matrix §4 列出 M3 作用域外指令(ARMv7E-M DSP 扩展,M4/M7 才有):QADD/QSUB/QDADD/QDSUB、PKHBT/PKHTB、SEL、SXTAH/UXTAH/SXTB16/UXTB16、UMAAL、SMLAD/SMLSD/SMLALD、USAD8/USADA8。audit 阶段曾 verify 它们 clean-fault,但未固化成回归测试 —— dispatch 表后续任何改动都可能让它们悄悄误中某个 handler。本批补回归 + 顺手修发现的真 bug。 + +## 编码数据源(不靠记忆,以 objdump 为权威) + +`arm-none-eabi-as -mcpu=cortex-m3` 拒编这些指令(证实 M3 无 DSP);改用 `-mcpu=cortex-m4` 汇编 + `objdump -d` 提取 18 个权威 hw1/hw2 编码。注意 M3 的 SXTH/SXTB/UXTH/UXTB 是 **16-bit `0xB2xx`**,而 `0xFA0x` 空间的 SXTAH/SXTB16 是 DSP —— 助记符相近但编码空间不同。 + +## 发现:misc_reverse 静默误中(9 条) + +dispatch mask 21 `t32_misc_reverse` 条件 `(hw1&0xFF00)==0xFA00 && (hw2&0x00F0)!=0` 只用 op2(hw2[7:4])区分 REV/REV16/RBIT/REVSH/CLZ,**完全没校验 op1(hw1[7:4])**。objdump 权威:合法 reverse/CLZ 族 op1 只能是 `9`(rev/rev16/revsh/rbit)或 `B`(clz): + +| 指令 | 编码 | op1 | op2 | +|------|------|-----|-----| +| rev.w | fa91 f081 | 9 | 8 | +| rev16.w | fa91 f091 | 9 | 9 | +| rbit | fa91 f0a1 | 9 | A | +| revsh.w | fa91 f0b1 | 9 | B | +| clz | fab1 f081 | B | 8 | + +但 `case 0x8 REV.W` 接受任意 op1,导致 op1∈{0,1,2,3,8,A} 的 DSP 编码被静默当 reverse 族执行: + +- SXTAH(`0xFA0x`)、UXTAH(`0xFA1x`)、SXTB16(`0xFA2x`)、UXTB16(`0xFA3x`)→ REV.W +- QADD(`0xFA8x`)→ REV.W;QSUB(op2=A)→ RBIT;QDADD(op2=9)→ REV16;QDSUB(op2=B)→ REVSH +- SEL(`0xFAAx`)→ REV.W + +## 修法 + +`t32_misc_reverse` 入口(CLZ 分支前)加 op1 门禁:CLZ 特判 `op1==B & op2==8`;否则要求 `op1==9`,否则 `IllegalInstruction`。一处改动,不动 dispatch 顺序(仍 load-bearing)。选 handler 内门禁而非收紧 mask —— 合法 reverse 族 op1 分散(9 与 B),mask 难精确表达,门禁更稳。 + +## 本就 clean-fault 的 9 条(无需改代码) + +- PKHBT/PKHTB(`0xEAC1`)虽命中 dataproc_reg(mask 19),但 op=6 → handler 内 default fault。 +- UMAAL/SMLAD/SMLSD/SMLALD/SMLALDX/USAD8/USADA8 编码不撞任何前置 mask → 末尾兜底 IllegalInstruction。 + +## 验证 + +- `test_cortex_m3_faults.cpp` 新增 `V7emDspInstructionsCleanFault`:18 编码,每条 `load_program({hw1,hw2})` 后 assert `step()` 返回 `IllegalInstruction` 且 `State::Faulted`。 +- `ctest` 全量 **272/272 绿**(reverse 族、固件 E2E 3 + gcc hal_uart、CLI、中断抢占均无回归)。 + +## 陷阱 + +- **op1 vs op2**:`0xFA00` 空间指令同时由 hw1[7:4](op1)和 hw2[7:4](op2)区分;合法 reverse 族 op1∈{9,B},只查 op2 会吞 DSP 编码。 +- **dispatch 顺序仍 load-bearing**:misc_reverse(op2≠0)排在 shift_reg(op2==0)之后,两者共占 `0xFA00`;门禁加在 handler 内,避免动 mask 顺序。 +- **SXTH vs SXTAH**:M3 extend 是 16-bit `0xB2xx`,`.W` 形式 `0xFA0x` 是 DSP —— 不要因助记符相近就假定同编码。 + +## 成果 + +§4 作用域门禁锁定:18 个 ARMv7E-M DSP 编码在 Cortex-M3 上全部 clean-fault,9 个静默误中 bug 已修 + 配回归。dispatch 表对 DSP 编码的误吞风险从此有回归守门。Thumb-2 全覆盖里程碑剩 §5 测试缺口 sweep(T4)。 diff --git a/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp b/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp index b679aa7..bab5c18 100644 --- a/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp +++ b/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp @@ -262,10 +262,19 @@ CPU::CPUExpected CortexM3CPU::t32_misc_reverse(uint16_t hw1, uint8_t op2 = (hw2 >> 4) & 0xFu; uint32_t v = rr(rn); - // CLZ (hw1[7:4]=0xB, op2=8) vs REV.W (hw1[7:4]=0x9, op2=8). - if (op2 == 0x8u && (hw1 & 0x00F0u) == 0x00B0u) { + // op1 = hw1[7:4]. In the 0xFA00 reverse/CLZ space only op1==0xB (CLZ, + // op2=8) and op1==0x9 (REV.W/REV16.W/RBIT/REVSH.W, op2 in {8,9,A,B}) are + // assigned on Cortex-M3 — objdump-confirmed: rev/rev16/revsh/rbit encode + // 0xFA9x, clz 0xFABx. ARMv7E-M DSP instructions (SXTAH/UXTAH/SXTB16/ + // UXTB16/SEL/QADD/QSUB/QDADD/QDSUB) share this 0xFA00 space with op1 in + // {0,1,2,3,8,A}; they must fault, not mis-execute as a reverse op. + uint8_t op1 = (hw1 >> 4) & 0xFu; + if (op2 == 0x8u && op1 == 0xBu) { return wr(rd, std::countl_zero(v)); } + if (op1 != 0x9u) { + return std::unexpected{CPUError::IllegalInstruction}; + } uint32_t result; switch (op2) { case 0x8u: // REV.W — byte-reverse (same result as 16-bit REV) diff --git a/test/test_cortex_m3_faults.cpp b/test/test_cortex_m3_faults.cpp index abe99e9..bcc0440 100644 --- a/test/test_cortex_m3_faults.cpp +++ b/test/test_cortex_m3_faults.cpp @@ -159,6 +159,46 @@ TEST_F(CortexM3Test, SvcEntersException11AndReturns) { EXPECT_EQ(reg(0), 1u); } +// ── ARMv7E-M DSP (Cortex-M3 scope-out): must clean-fault, not silently decode ── +// Cortex-M3 = ARMv7-M base (no DSP extension). These instructions are +// UndefinedInstruction on M3. Encodings sourced authoritatively from +// `arm-none-eabi-as -mcpu=cortex-m4` + objdump (the M3 assembler rejects them). +// Guards the dispatch table against silent mis-decode: several land inside an +// existing handler's mask (PKHBT/PKHTB in dataproc-reg 0xEA00; SXTAH/UXTB16 in +// shift-reg 0xFA00) and must be carved out so they fault, not execute. +TEST_F(CortexM3Test, V7emDspInstructionsCleanFault) { + struct Case { + uint16_t hw1, hw2; + const char* name; + }; + const Case cases[] = { + {0xFA82, 0xF081, "qadd"}, {0xFA82, 0xF0A1, "qsub"}, + {0xFA82, 0xF091, "qdadd"}, {0xFA82, 0xF0B1, "qdsub"}, + {0xEAC1, 0x1002, "pkhbt"}, {0xEAC1, 0x1022, "pkhtb"}, + {0xFAA1, 0xF082, "sel"}, + {0xFA01, 0xF082, "sxtah"}, {0xFA11, 0xF082, "uxtah"}, + {0xFA2F, 0xF081, "sxtb16"}, {0xFA3F, 0xF081, "uxtb16"}, + {0xFBE2, 0x0163, "umaal"}, + {0xFB21, 0x3002, "smlad"}, {0xFB41, 0x3002, "smlsd"}, + {0xFBC2, 0x01C3, "smlald"}, {0xFBC2, 0x01D3, "smlaldx"}, + {0xFB71, 0xF002, "usad8"}, {0xFB71, 0x3002, "usada8"}, + }; + for (const auto& c : cases) { + load_program({c.hw1, c.hw2}); + reset_cpu(); + start_cpu(); + auto res = cpu_->step(); + EXPECT_FALSE(res.has_value()) << c.name << " should fault"; + if (!res.has_value()) { + EXPECT_EQ(res.error(), CPU::CPUError::IllegalInstruction) + << c.name; + } + auto st = cpu_->state(); + ASSERT_TRUE(st.has_value()) << c.name; + EXPECT_EQ(*st, CPU::State::Faulted) << c.name; + } +} + TEST_F(CortexM3Test, MrsMsrExtendedSystemRegisters) { load_program({ 0xF380, 0x8810, // msr primask, r0 From be40f8d13d11c21b6fc025bb4e2a6cbb2355e68b Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Tue, 23 Jun 2026 17:07:15 +0800 Subject: [PATCH 2/2] =?UTF-8?q?test(cortex-m3):=20cover=20=C2=A75=20gaps?= =?UTF-8?q?=20=E2=80=94=20LDRD/STRD=20modes,=20post/pre-index,=20flag=20sw?= =?UTF-8?q?eep=20(T4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test_cortex_m3_loadstore.cpp: LDRD/STRD across the P/U/W/L matrix (imm offset / pre / post / negative) and single load/store .W imm8 modes (op B/9/F/D = post+/post-/pre+/pre-), asserting concrete values and writeback rather than round-trip. Add shifted-reg flag sweep to test_cortex_m3_advanced: ADD carry/overflow, SUB borrow, ROR operand — the arithmetic flag path after a shifter operand (T1a's shifter-carry→C fix), read via MRS APSR. Pure test additions, zero src change. ctest 286/286. --- .../notes/013-thumb2-coverage-gap-sweep.md | 34 ++++ test/CMakeLists.txt | 1 + test/test_cortex_m3_advanced.cpp | 57 ++++++ test/test_cortex_m3_loadstore.cpp | 163 ++++++++++++++++++ 4 files changed, 255 insertions(+) create mode 100644 document/notes/013-thumb2-coverage-gap-sweep.md create mode 100644 test/test_cortex_m3_loadstore.cpp diff --git a/document/notes/013-thumb2-coverage-gap-sweep.md b/document/notes/013-thumb2-coverage-gap-sweep.md new file mode 100644 index 0000000..02eb9c2 --- /dev/null +++ b/document/notes/013-thumb2-coverage-gap-sweep.md @@ -0,0 +1,34 @@ +# 013 — Thumb-2 §5 测试覆盖缺口 sweep(T4) + +> Thumb-2 全覆盖里程碑 · T4(收尾)。为已实现但缺针对性单测的指令补回归,锁定正确性。纯补测、零 src 改动 —— 14 新单测全部一次通过,无 bug 发现(与 §2/§3 已修 + §4 已门禁一致)。ctest **286/286 绿**(272 + 14)。 + +## 背景 + +matrix §5 列出已实现指令的测试缺口。前置 §2 静默错(T1c)、§3 缺失(T2)、§4 作用域门禁(T3)已清,本批把覆盖「钉死」,防后续 dispatch 改动悄悄退化为旧 bug。 + +## 补测范围(14 新单测) + +### LDRD / STRD 全寻址模式(`test_cortex_m3_loadstore.cpp`,新文件) +P/U/W/L 矩阵全覆盖,断言**具体值 + writeback**(非 roundtrip —— roundtrip 用同地址算会掩盖 P/U/W 或 rt/rt2 字段错,正是 T1c #9/#10 那类 bug 的温床): +- LDRD:imm offset / pre-index(+WB)/ post-index(+WB)/ 负偏移 pre-index(U=0)。 +- STRD:imm offset / post-index(+WB)。 + +### Load/Store single .W imm8 寻址模式(op=B/9/F/D) +hw2[11:8] op 模式:post+(B)、post-(9)、pre+(F)、pre-(D),全断言 load/store 值 + writeback,覆盖 `t32_loadstore_single` 的 imm8 分支。 + +### Data-proc shifted-reg flag sweep(`test_cortex_m3_advanced.cpp`) +覆盖 T1a 修过的 shifter-carry→C 之后的**算术 flag 更新**路径(ADD 进位/溢出、SUB 借位、ROR operand 计算),flag 经 `MRS R0,APSR` 读(N=31/Z=30/C=29/V=28)。lsl/lsr/ror operand 各一。 + +## 验证 + +- `ctest` 全量 **286/286 绿**,固件 E2E(3 AC6 + gcc hal_uart)/ CLI / 中断抢占无回归。 +- 全部编码 `arm-none-eabi-as` + `objdump -d` 权威确认字段位。 + +## 设计点 + +- **断言具体值非 roundtrip**:LDRD/STRD/load-store 全部断言独立计算的期望地址 + 值;写回寄存器单独断言。若 handler 算错地址/字段,测试直接红而非自洽通过。 +- **flag 经 MRS 读**:fixture 不暴露 xpsr_,沿用 basic.cpp 的 `MRS R0,APSR`(0xF3EF 0x8000)读 flags 到 r0 再查位。 + +## 成果 + +Thumb-2 全覆盖里程碑核心完成:T0 拆分 → T1 静默错 11/11 → T2 M3 缺失指令 → T3 作用域 clean-fault 门禁(含修 misc_reverse 误中)→ T4 测试缺口 sweep。模拟器从「够跑自写裸机」提升到「ARMv7-M base 指令集覆盖 + clean-fault 边界锁定 + 回归守门」。余 T5a/b(CMSIS-DSP / CubeF1 活体语料,可选验收)非阻塞。 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 93c51d7..b37c6dd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable(test_cortex_m3 test_cortex_m3_basic.cpp test_cortex_m3_faults.cpp test_cortex_m3_advanced.cpp + test_cortex_m3_loadstore.cpp ) target_link_libraries(test_cortex_m3 PRIVATE micro_forge GTest::gtest_main diff --git a/test/test_cortex_m3_advanced.cpp b/test/test_cortex_m3_advanced.cpp index 2e4c575..0ed0cd8 100644 --- a/test/test_cortex_m3_advanced.cpp +++ b/test/test_cortex_m3_advanced.cpp @@ -672,3 +672,60 @@ TEST_F(CortexM3Test, McrMrcCoprocessorFaults) { start_cpu(); EXPECT_FALSE(cpu_->step().has_value()); // faults } + +// ── Flag sweep: data-proc (shifted register) N/Z/C/V (matrix §5 gap) ── +// T1a fixed shifter-carry feeding C; these cover the *arithmetic* flag update +// path after a shifted operand (ADD carry/overflow, SUB borrow). Flags read via +// `MRS R0, APSR` (0xF3EF 0x8000): N=31, Z=30, C=29, V=28. + +TEST_F(CortexM3Test, AddsShiftedRegSetsCarryOverflowClearsN) { + // eb11 1302 = adds.w r3, r1, r2, lsl #4. + // 0x80000000 + (0x08000000 << 4 = 0x80000000) = 0x1_00000000 → r3=0, + // C=1 (carry out), V=1 (signed -2^31 + -2^31 overflow), Z=1, N=0. + load_program({0xEB11, 0x1302, 0xF3EF, 0x8000}); + reset_cpu(); + set_reg(1, 0x80000000u); + set_reg(2, 0x08000000u); + start_cpu(); + step_cpu(); // adds.w + step_cpu(); // mrs r0, apsr + EXPECT_EQ(reg(3), 0u); + EXPECT_NE(reg(0) & (1u << 29), 0u) << "carry out sets C"; + EXPECT_NE(reg(0) & (1u << 28), 0u) << "signed overflow sets V"; + EXPECT_NE(reg(0) & (1u << 30), 0u) << "zero result sets Z"; + EXPECT_EQ(reg(0) & (1u << 31), 0u) << "N clear"; +} + +TEST_F(CortexM3Test, SubsShiftedRegBorrowClearsCarrySetsN) { + // ebb1 0392 = subs.w r3, r1, r2, lsr #2. + // 0x10 - (0x100 >> 2 = 0x40) = -0x30 → r3=0xFFFFFFD0, + // C=0 (borrow), N=1, Z=0, V=0 (in range). + load_program({0xEBB1, 0x0392, 0xF3EF, 0x8000}); + reset_cpu(); + set_reg(1, 0x10u); + set_reg(2, 0x100u); + start_cpu(); + step_cpu(); + step_cpu(); + EXPECT_EQ(reg(3), 0xFFFFFFD0u); + EXPECT_EQ(reg(0) & (1u << 29), 0u) << "borrow clears C"; + EXPECT_NE(reg(0) & (1u << 31), 0u) << "negative result sets N"; + EXPECT_EQ(reg(0) & (1u << 30), 0u) << "Z clear"; +} + +TEST_F(CortexM3Test, AddsRorOperandComputesResultAndFlags) { + // eb11 1332 = adds.w r3, r1, r2, ror #4. + // 0 + (0x10000000 ror 4 = 0x01000000) = 0x01000000; no carry/overflow. + load_program({0xEB11, 0x1332, 0xF3EF, 0x8000}); + reset_cpu(); + set_reg(1, 0u); + set_reg(2, 0x10000000u); + start_cpu(); + step_cpu(); + step_cpu(); + EXPECT_EQ(reg(3), 0x01000000u); + EXPECT_EQ(reg(0) & (1u << 29), 0u) << "no carry"; + EXPECT_EQ(reg(0) & (1u << 28), 0u) << "no overflow"; + EXPECT_EQ(reg(0) & (1u << 31), 0u) << "N clear"; + EXPECT_EQ(reg(0) & (1u << 30), 0u) << "Z clear"; +} diff --git a/test/test_cortex_m3_loadstore.cpp b/test/test_cortex_m3_loadstore.cpp new file mode 100644 index 0000000..0fae751 --- /dev/null +++ b/test/test_cortex_m3_loadstore.cpp @@ -0,0 +1,163 @@ +#include "test_cortex_m3_common.hpp" + +// ── LDRD / STRD (.W) — all addressing modes (matrix §5 / F32-9 gap) ── +// Encodings via arm-none-eabi objdump. Asserts concrete values + writeback, +// not round-trip: loading/storing against the same address the handler computes +// would mask a P/U/W or rt/rt2 field bug (the class of bug T1c #9/#10 had). + +TEST_F(CortexM3Test, LdrdImmediateOffset) { + // e9d1 3402 = ldrd r3, r4, [r1, #8]: [base+8]→r3, [base+12]→r4, no writeback. + ASSERT_TRUE(mem_.write(0x108, 0xAAAA1110u, Width::Word).has_value()); + ASSERT_TRUE(mem_.write(0x10C, 0xBBBB2220u, Width::Word).has_value()); + load_program({0xE9D1, 0x3402}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0xAAAA1110u); + EXPECT_EQ(reg(4), 0xBBBB2220u); + EXPECT_EQ(reg(1), 0x100u); // no writeback +} + +TEST_F(CortexM3Test, LdrdPreIndexWriteback) { + // e9f1 3402 = ldrd r3, r4, [r1, #8]!: addr=base+8, writeback base+8. + ASSERT_TRUE(mem_.write(0x108, 0x1111u, Width::Word).has_value()); + ASSERT_TRUE(mem_.write(0x10C, 0x2222u, Width::Word).has_value()); + load_program({0xE9F1, 0x3402}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0x1111u); + EXPECT_EQ(reg(4), 0x2222u); + EXPECT_EQ(reg(1), 0x108u); +} + +TEST_F(CortexM3Test, LdrdPostIndexWriteback) { + // e8f1 3402 = ldrd r3, r4, [r1], #8: addr=base, writeback base+8. + ASSERT_TRUE(mem_.write(0x100, 0x3333u, Width::Word).has_value()); + ASSERT_TRUE(mem_.write(0x104, 0x4444u, Width::Word).has_value()); + load_program({0xE8F1, 0x3402}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0x3333u); + EXPECT_EQ(reg(4), 0x4444u); + EXPECT_EQ(reg(1), 0x108u); +} + +TEST_F(CortexM3Test, LdrdNegativePreIndex) { + // e971 3402 = ldrd r3, r4, [r1, #-8]! (U=0): addr=base-8, writeback base-8. + ASSERT_TRUE(mem_.write(0x0F8, 0x5555u, Width::Word).has_value()); + ASSERT_TRUE(mem_.write(0x0FC, 0x6666u, Width::Word).has_value()); + load_program({0xE971, 0x3402}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0x5555u); + EXPECT_EQ(reg(4), 0x6666u); + EXPECT_EQ(reg(1), 0x0F8u); +} + +TEST_F(CortexM3Test, StrdImmediateOffset) { + // e9c1 3402 = strd r3, r4, [r1, #8]: r3→[base+8], r4→[base+12], no writeback. + load_program({0xE9C1, 0x3402}); + reset_cpu(); + set_reg(1, 0x100u); + set_reg(3, 0x7777u); + set_reg(4, 0x8888u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + auto v1 = bus_.read(0x108, Width::Word); + auto v2 = bus_.read(0x10C, Width::Word); + ASSERT_TRUE(v1.has_value()); + ASSERT_TRUE(v2.has_value()); + EXPECT_EQ(*v1, 0x7777u); + EXPECT_EQ(*v2, 0x8888u); + EXPECT_EQ(reg(1), 0x100u); +} + +TEST_F(CortexM3Test, StrdPostIndexWriteback) { + // e8e1 3402 = strd r3, r4, [r1], #8: store at base, writeback base+8. + load_program({0xE8E1, 0x3402}); + reset_cpu(); + set_reg(1, 0x100u); + set_reg(3, 0x9999u); + set_reg(4, 0xAAAAu); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + auto v1 = bus_.read(0x100, Width::Word); + auto v2 = bus_.read(0x104, Width::Word); + ASSERT_TRUE(v1.has_value()); + ASSERT_TRUE(v2.has_value()); + EXPECT_EQ(*v1, 0x9999u); + EXPECT_EQ(*v2, 0xAAAAu); + EXPECT_EQ(reg(1), 0x108u); +} + +// ── Load/Store single (.W) imm8 modes: post-/pre-index (matrix §5 / F32-8 gap) ── +// hw2[11:8] op selects the mode (B=post+, 9=post-, F=pre+, D=pre-). + +TEST_F(CortexM3Test, LdrWidePostIndexPositive) { + // f851 3b04 = ldr.w r3, [r1], #4 (op=B): load [base], writeback base+4. + ASSERT_TRUE(mem_.write(0x100, 0xDEADBEEFu, Width::Word).has_value()); + load_program({0xF851, 0x3B04}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0xDEADBEEFu); + EXPECT_EQ(reg(1), 0x104u); +} + +TEST_F(CortexM3Test, LdrWidePostIndexNegative) { + // f851 3904 = ldr.w r3, [r1], #-4 (op=9): load [base], writeback base-4. + ASSERT_TRUE(mem_.write(0x100, 0xCAFEBABEu, Width::Word).has_value()); + load_program({0xF851, 0x3904}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0xCAFEBABEu); + EXPECT_EQ(reg(1), 0x0FCu); +} + +TEST_F(CortexM3Test, StrWidePostIndexWriteback) { + // f841 3b04 = str.w r3, [r1], #4 (op=B): store at base, writeback base+4. + load_program({0xF841, 0x3B04}); + reset_cpu(); + set_reg(1, 0x100u); + set_reg(3, 0x12345678u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + auto v = bus_.read(0x100, Width::Word); + ASSERT_TRUE(v.has_value()); + EXPECT_EQ(*v, 0x12345678u); + EXPECT_EQ(reg(1), 0x104u); +} + +TEST_F(CortexM3Test, LdrWidePreIndexWriteback) { + // f851 3f04 = ldr.w r3, [r1, #4]! (op=F): load [base+4], writeback base+4. + ASSERT_TRUE(mem_.write(0x104, 0x0BADF00Du, Width::Word).has_value()); + load_program({0xF851, 0x3F04}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0x0BADF00Du); + EXPECT_EQ(reg(1), 0x104u); +} + +TEST_F(CortexM3Test, LdrWidePreIndexNegative) { + // f851 3d04 = ldr.w r3, [r1, #-4]! (op=D): load [base-4], writeback base-4. + ASSERT_TRUE(mem_.write(0x0FC, 0xFEEDFACEu, Width::Word).has_value()); + load_program({0xF851, 0x3D04}); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(3), 0xFEEDFACEu); + EXPECT_EQ(reg(1), 0x0FCu); +}