diff --git a/CMakeLists.txt b/CMakeLists.txt index 08f1729..dfcb81c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,8 @@ set(MICRO_FORGE_SOURCES src/arch/arm/cortex_m3/cortex_m3.cpp src/arch/arm/cortex_m3/cortex_m3_thumb16.cpp src/arch/arm/cortex_m3/cortex_m3_thumb32.cpp + src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp + src/arch/arm/cortex_m3/cortex_m3_thumb32_loadstore.cpp src/arch/arm/cortex_m3/cortex_m3_interrupt.cpp src/arch/arm/cortex_m3/cortex_m3_reset.cpp src/arch/toy/cpu.cpp diff --git a/document/notes/008-armcc-ac6-firmware-corpus.md b/document/notes/008-armcc-ac6-firmware-corpus.md new file mode 100644 index 0000000..bc409fc --- /dev/null +++ b/document/notes/008-armcc-ac6-firmware-corpus.md @@ -0,0 +1,39 @@ +# 008 — armcc/AC6 固件语料 E2E (T5c) + +> 2026-06-21。把真实固件语料从「1 份 Keil F103.axf」(见 [007](007-cortex-m3-f103-keil-firmware.md))扩到「3 份 CubeF1 Nucleo 示例」,作为 ctest E2E 回归门禁,补 armcc codegen 多样性。 + +## 背景 + +007 证明 Keil/MDK 固件能跑、且一次就暴露 9 处 Cortex-M3 缺口。但只有 1 份 armcc 样本(用户自写 CubeMX 工程)。要验证「任意真实固件」,需要一批 armcc 编译的标准示例。 + +## 目标 + +- 用 Keil MDK headless 批量编 CubeF1 **STM32F103RB-Nucleo** 小示例(GPIO/TIM/UART)。 +- 产出的 `.axf` 作 ctest E2E fixture,**一次编译、提交进仓**,之后改动指令自动回归(CI 无 Keil)。 + +## 关键决策 + +1. **走 Keil headless(`UV4 -b`),不重生成工程**。CubeF1 自带 `.uvprojx`,直接编。 +2. **复制最小子树到本地 NTFS(`D:\mf\`),不在 WSL 文件系统编**(见陷阱 3)。 +3. **二进制 fixture 提交**,配方进 `test/firmware/armcc/REGENERATE.md`;vendored 子模块补丁不入库。 + +## 陷阱(本次核心价值,全是不靠运气撞出来的) + +1. **AC5→AC6 编译器墙**:CubeF1 示例工程是 **AC5(armcc)** 配置,新版 MDK 只剩 **AC6(armclang)**(AC5 已退役)。`UV4 -b` 报 `uses ARM-Compiler 'Default Compiler Version 5' which is not available`,直接 abort。 +2. **`` 的正确位置是 `` 直接子级**(紧跟 ``),**不是** `` 内。手插错位置会被静默忽略——这步靠 Keil GUI 切一次 AC6 才学到正确写法。 +3. **WSL 文件系统(9p)建不了 Keil 的 `.__i` 响应文件**。经 `\\wsl.localhost` 或映射盘符(`net use Z:`)都不行(同一 9p 后端)。**必须编在本地 NTFS**——把 Drivers + 选定 Examples 复制到 `D:\mf\STM32CubeF1\`,保留 `..\..\..\..\..\Drivers` 相对深度。 +4. **`--C99` 是 AC5 flag,AC6 不认**(`armclang: error: unknown argument: '--C99'`)。藏在 `` 里,删掉即可(C99 由 AC6 默认给)。 +5. **`` 陷阱**:GUI 切 AC6 时 Keil 会写入一个语言标准字段,实测强制成 C90 → `cmsis_armclang.h` 的 `inline` 报 `unknown type name`。**删 ``/``/``**,走 AC6 默认 gnu11。 +6. **WSL→Windows interop 偶发挂**(`exec format error`,UV4/cmd.exe/net.exe 全挂)。挂了就在 Windows 原生跑 `.bat`。`/mnt/d/mf` 从 WSL 仍可读写,故编完拷回不阻塞。 + +## 验证 + +- 3 份 `.axf` 全 `ELF32 / ARM / entry=0x80000ed`,`Stm32f103Soc::load_elf` 直接加载。 +- `FirmwareArmcc.{GpioIoToggle,TimTimeBase,UartPrintf}BootsClean` 三测全 **0 fault**(2,000,000 步)。 +- 全量 ctest **247/247** 绿。 +- **当前模拟器对 armcc codegen 零 fault**——和 F103.axf 一致,未提前暴露新指令缺口(那些等 T1/T2 主动挖)。 + +## 后续 + +- 想扩语料:照 `REGENERATE.md` 加示例 + 在 `test_firmware_armcc.cpp` 加 `BootsClean` 测试。 +- armcc 多样性的真正价值在 **T1/T2 修完指令后**——那时这些固件成了活体验收器。 diff --git a/document/notes/009-thumb2-reg-offset-loadstore.md b/document/notes/009-thumb2-reg-offset-loadstore.md new file mode 100644 index 0000000..c8b010b --- /dev/null +++ b/document/notes/009-thumb2-reg-offset-loadstore.md @@ -0,0 +1,64 @@ +# 009 — Thumb-2 寄存器偏移 load/store 修复(matrix §2 #9) + +> Thumb-2 全覆盖里程碑 · T1b。修 matrix §2 高危静默 bug #9(F32-8):`ldr/str.w [Rn,Rm]` 静默算错地址。 + +## 背景 + +Thumb-2 全编码覆盖矩阵([`document/ai/thumb2-coverage-matrix.md`](../ai/thumb2-coverage-matrix.md))§2 列出 11 处 🔴 高危「静默错误」——指令不算 fault,但结果悄悄算错,固件 0-fault 跑通反而掩盖了它们。T1a 已修 #1(shift Carry 不更新)。本批 T1b 修 **#9**:寄存器偏移 load/store 静默算错地址。 + +## 问题 + +`ldr.w r0,[r1,r2]`、`str.w r0,[r1,r2]`、`strb.w`、`ldrh.w` 等所有 **register-offset** 形式(编码 hw1=0xF8x1、hw2=0002 之类)被 `t32_loadstore_single` 误当成 imm8 offset+,hw2[7:0] 当成 imm8,算出 `r1+2` 而非 `r1+r2`。**不 fault**,模拟器不自知结果错。 + +``` +ldr.w r0,[r1,r2] → 期望 addr = 0x100 + 0x40 = 0x140 + → 实际 addr = 0x100 + 0x02 = 0x102 ← 静默错 +``` + +## 根因(objdump 裁定,非记忆) + +dispatch `(hw1 & 0xFF00) == 0xF800` 覆盖 0xF8xx 全空间。函数内 `(hw1>>7)&1` 区分 imm12(bit7=1)与其余(bit7=0)。问题在 bit7=0 的 else 分支: + +- 旧代码按 `op=hw2[11:8]` switch,`case 0x0` 当「`[Rn, #+imm8]`」。 +- 但 `arm-none-eabi-as` 实测:正向小立即数(`ldr.w [r1,#4]`)永远折叠进 imm12(T3,F8D1),**不会**用 imm8 offset+ 编码。所以在 bit7=0 空间,`op==0` 的**唯一**含义就是 register-offset。 +- 旧 `case 0x0` 既是死分支(无有效 imm8 offset+ 编码),又把 reg-offset 的 Rm/shift 字段当 imm8 解析 —— 双重错。 + +区分位(objdump 全形式实测裁定): + +| hw1[7] | hw2[11:8]=op | 形式 | +|--------|------|------| +| 1 | — | imm12(T3):`addr = Rn + imm12` | +| 0 | 0x0 | **register-offset**(T2):`addr = Rn + (Rm << shift2)` | +| 0 | C / B / 9 / F / D | imm8 addressing modes(T4):off- / post± / pre± | +| 0 | 其他 | IllegalInstruction | + +reg-offset 字段:`Rt=hw2[15:12]`、`shift=hw2[5:4]`(LSL 0–3)、`Rm=hw2[3:0]`,无 writeback。 + +## 修复 + +[`src/arch/arm/cortex_m3/cortex_m3_thumb32_loadstore.cpp`](../../src/arch/arm/cortex_m3/cortex_m3_thumb32_loadstore.cpp) `t32_loadstore_single`: + +1. **base 对齐下沉**:Rn=15 的 `Align(PC+4,4)` 从「literal 专享特判」下沉为通用 base —— reg-offset 以 PC 为基(`ldr.w r0,[pc,r1]`)也正确受益。 +2. **else 分支 carve out**:`op==0` 单独走 register-offset(`addr = base + (rr(rm) << shift)`);imm8 switch 删除无效的 `case 0x0`。 +3. store-to-PC-relative(imm12 Rn=15 & !load)拒绝,语义不变。 + +## 测试 + +两个新单测([`test/test_cortex_m3_advanced.cpp`](../../test/test_cortex_m3_advanced.cpp)): + +- `LoadStoreWideRegisterOffset`:`str.w [r1,r2]` 断言写到 **0x140**(+ regression guard 读 0x102 应无该值);`ldr.w [r1,r2,lsl#3]` 断言读 **0x300**(shift 是关键,bug 下会读 0x132)。 +- `LoadStoreWideRegisterOffsetByteHalf`:`strb.w [r1,r2]`(byte→0x110)、`ldrh.w [r1,r3]`(half←0x140)。 + +**关键设计**:断言**具体地址**(`bus_.read(addr, Width)`),不用 roundtrip —— roundtrip 的 str/ldr 用同一(错误)地址会互相抵消、掩盖 bug。这正是 #9 长期没被现有测试抓到的原因(`LoadStoreWideWordAndHalfwordImmediateOffsets` 就是 roundtrip)。 + +## 验证 + +- 全部编码经 `arm-none-eabi-as -mcpu=cortex-m3` + `objdump -d` 核验。 +- `ctest --test-dir build` 全量 **254 绿**(原 252 + 新增 2),无回归。 +- `cmake --build build -j$(nproc)` 全量编译绿。 + +## 陷阱 / 未竟 + +- **LDRSB.W / LDRSH.W(0xF9xx)仍未 dispatch**(matrix §3 缺失):`(hw1&0xFF00)==0xF800` 不匹配 0xF9xx,fallthrough → IllegalInstruction。这是整个 0xF9xx 空间缺失,范围更大,**另起一批**,本批不动。 +- matrix 引用的代码行号是 T0 拆分前的(`cortex_m3_thumb32.cpp:366-474`),拆分后实际落在 `cortex_m3_thumb32_loadstore.cpp`;不影响 #9 的判断与修复,行号全量更新属另一清理任务。 +- §2 剩余高危:#2 ORN.W 丢 Rn、#3 RSB 标志错、#4 LSR#0/ASR#0、#10 TBH→LDRD、#11 LDREX/STREX→STRD/LDRD —— 下一批候选。 diff --git a/document/notes/010-thumb2-silent-bug-sweep.md b/document/notes/010-thumb2-silent-bug-sweep.md new file mode 100644 index 0000000..b18541d --- /dev/null +++ b/document/notes/010-thumb2-silent-bug-sweep.md @@ -0,0 +1,44 @@ +# 010 — Thumb-2 §2 高危静默错误清零(T1c) + +> Thumb-2 全覆盖里程碑 · T1c。继 T1a(shift Carry)、T1b(reg-offset load/store,notes 009)之后,清零 matrix §2 剩余 9 处静默错误(#2–#8、#10、#11)。**§2 高危 11/11 全部修复**。 + +## 背景 + +matrix [`document/ai/thumb2-coverage-matrix.md`](../ai/thumb2-coverage-matrix.md) §2 列出 11 处 🔴「静默错误」——指令不 fault,但结果悄悄算错/写错。本批一口气修完剩余 9 处,每条配 objdump 核验 + 针对性单测。 + +## 修复(逐条 + 修复点) + +| # | bug | 修复 | +|---|-----|------| +| 2 | ORN.W reg 丢 Rn | `t32_dataproc_reg` op=3:`~shifted` → `Rn \| ~shifted`(Rn=15 退化为 MVN)。 | +| 3 | RSB.W 标志颠倒 | RSB(op=14)被减数是 shift 操作数;reg 与 imm 两处 `update_flags` 改传 `(Sub, shift-op, Rn)`。 | +| 4 | LSR/ASR shift-by-32 | `dataproc_reg` imm3:imm2==0:LSR→0、ASR→符号扩展(原返回 rm_val 不变)。 | +| 5 | CPSID/IE f 拨错寄存器 | `0xFFF0` 掩码忽略 bit[0] → `0xFFE0` 掩码 + bit4(E/D)/bit1(i)/bit0(f) 分发,FAULTMASK 正确。 | +| 6 | BKPT 静默 NOP | `0xBExx` 落 hints 的 NOP 兜底 → `trigger_hardfault()`(vector 3)。 | +| 7 | MUL.W Ra=15 叠 raw PC | MLA/MLS 块:`rr(15)` 加进乘积 → Ra=15 视作「无累加」(acc=0)。 | +| 8 | ADR.W off-by-2 | `t32_addsub_plain_imm`:ADDW/SUBW Rn=PC 用 `rr(15)`(raw PC)→ `Align(PC+4,4)`。 | +| 10 | TBH 掩码漏 H 位 | TBB/TBH dispatch hw2 掩码 `0xF0F0` 检查了 bit4(H)→ 改 `0xFFE0`(放开 [4:0]=H+Rm)。 | +| 11 | LDREX/STREX 撞 STRD/LDRD | 新 `t32_ldrex_strex`,mask `0xFF60==0xE840`(exclusive 空间 P=0&W=0;STRD/LDRD 必有 P 或 W,故不撞);单核 sim 简化为普通 LD/ST(STREX 总成功 Rd=0)。 | + +## 关键陷阱:#10 mask 首版写错,E2E 抓到 + +#10 第一版把 hw2 掩码写成了 `0xFF0F`(检查 hw2[3:0])。但 **TBB 的 Rm 字段就在 hw2[3:0]**。gcc hal_uart 固件的 `tbb [pc,r4]`(hw2=`0xF004`,Rm=4≠0)因此**不匹配** TBB/TBH dispatch → fall through 到新加的 #11 LDREX dispatch(`0xE8DF & 0xFF60 == 0xE840`)→ tbb 被当 LDREX(load,Rd=hw2[15:12]=15)→ `wr(15, 读到的字节)` → **PC 跳飞到 GPIOA(0x40010804)**。 + +`E2E.HalUartTransmit` 以 `InstructionFetchFault` 抓到(PC 跑到外设区)。反汇编 hal_uart 定位到 `tbb [pc,r4]`,才锁定是 #10 mask 而非 #11。正确 mask 是 `0xFFE0`(检查 [15:5],放开 [4:0]=H+Rm)。 + +**教训**: +- mask 改动必须 objdump 验证所有变体,尤其 Rm≠0 的 TBB(之前只验证了 `tbh [pc,r0]` 这种 Rm=0 的)。 +- 新加的「前置 dispatch mask」(LDREX)要确认不吞掉共享 hw1 空间的其它指令——TBB/TBH/LDREXB/H 同居 `0xE8Dx`。**dispatch 顺序(TBB/TBH → LDREX → STRD/LDRD)是 load-bearing**。 +- 间接测试(roundtrip / 0-fault 固件)掩盖静默错;**针对性单测(断言具体值/地址/标志)才是安全网**。这次正是 hal_uart E2E 救了场。 + +## 验证 + +- `ctest` 全量 **263/263 绿**(254 + 9 新单测,每条 bug 一个针对性断言:`test_cortex_m3_advanced.cpp`)。 +- 全部编码 `arm-none-eabi-as -mcpu=cortex-m3` + `objdump -d` 核验。 +- 固件 E2E(3 份 AC6 + gcc hal_uart)全绿,证明修复不破坏真实固件启动(且 hal_uart 的 tbb 反向验证了 #10)。 + +## 未竟(下一里程碑 T2) + +- §3 缺失指令(M3 范围内):**LDRSB.W/LDRSH.W(0xF9xx 整族)**、ORN.W/MVN.W imm、ROR(shifted-reg)+RRX、SMLAL/UMLAL(长乘累加)、CLZ/RBIT/REV.W 族、SSAT/USAT(+Q 标志)、CLREX/NOP.W hint 族、MCR/MRC 策略。 +- §4 作用域门禁(ARMv7E-M DSP 指令 clean-fault 验证)、§5 测试缺口 sweep。 +- 行 255 SBFX/UBFX dispatch 的 `|| (hw1&0xFB70)==0xF3C0` 是 tautological 死代码(pre-existing,clangd 报但 gcc 不报,功能靠 `is_unsigned` 正确)——可顺手清,非阻塞。 diff --git a/document/notes/011-thumb2-missing-instructions.md b/document/notes/011-thumb2-missing-instructions.md new file mode 100644 index 0000000..0f276be --- /dev/null +++ b/document/notes/011-thumb2-missing-instructions.md @@ -0,0 +1,42 @@ +# 011 — Thumb-2 §3 缺失指令补全(T2) + +> Thumb-2 全覆盖里程碑 · T2。补全 matrix §3「M3 范围内缺失指令」。继 T1(§2 静默错误 11/11 清零)之后,把模拟器从「能跑现有固件」推向「覆盖 ARMv7-M base 指令集」。全部 objdump 核验 + 单测,**ctest 271/271 绿**。 + +## 新增指令 + +| 指令 | 实现 | +|------|------| +| ORN.W / MVN.W imm | `dataproc_imm` 加 case 3:`Rn \| ~imm32`(Rn=15 退化为 MVN)。逻辑标志走 update_nz。 | +| ROR / RRX(shifted-reg operand) | `dataproc_reg` shift 加 case 3:`shift_n==0` → RRX(`(C<<31)\|(Rm>>1)`,读 PSR_C);否则 ROR by n。 | +| SMLAL / UMLAL | 扩长乘块(0xFBC0/0xFBE0):`RdHi:RdLo += signed/unsigned(Rn*Rm)`,read-before-write 累加。 | +| LDRSB.W / LDRSH.W | dispatch mask `0xFF00→0xFE00`(含 0xF9xx);handler `hw1[8]` 判 sign,load 后 byte/half sign-extend。sign store → IllegalInstruction。 | +| CLZ / RBIT / REV.W / REV16.W / REVSH.W | 新 `t32_misc_reverse`:CLZ=`std::countl_zero`,RBIT=位反转,REV/REV16/REVSH 复用 16 位族逻辑。CLZ vs REV.W 用 `hw1[7:4]`(0xB vs 0x9)区分。 | +| SSAT / USAT | 新 `t32_ssat_usat`:饱和到有/无符号范围,越界写 **APSR.Q**。sat 宽度 `hw2[4:0]`(SSAT +1),shift imm5=`(hw2[14:12]<<2)\|hw2[7:6]`。 | +| CLREX / NOP.W / YIELD.W / SEV.W | barrier handler 加 op=2(CLREX);新 `hw1==0xF3AF` handler(hints 全 no-op)。 | +| MCR / MRC | 无 handler → fall through 末尾 IllegalInstruction(架构上应 NoCoproc UsageFault;CPUError 无 NoCoproc,IllegalInstruction 为合理 clean fault)。M3 无协处理器,两份固件 0 命中。 | + +## 关键设计点 + +- **PSR_Q(bit27)**:新增(def.h),APSR 的 MRS/MSR(sysm 0x00)读写含 Q,SSAT/USAT 饱和时置位。 +- **dispatch 顺序 load-bearing**(延续 T1c #10 教训): + - SSAT/USAT(`0xF3xx`)必须**早于** dataproc-imm —— 否则 SSAT `0xF301` 命中 `(hw1&0xF800)==0xF000` 被当 ADD-imm。mask `0xFFD0` 放开 hw1[5](shift type)。 + - CLZ/RBIT/REV(`0xFA00` with op2≠0)在 shift_reg 之后 —— shift_reg 只收 op2==0。 + - LDRSB/SH 的 `0xFE00` mask 含 0xF8xx(无符号)+ 0xF9xx(符号)。 + - MCR/MRC(`0xEExx`)不匹配任何前置 dispatch,末尾兜底。 +- **字段位**(objdump 权威):`mov.w r3,r1,rrx = ea4f 0331` —— **Rd 在 hw2[11:8]、imm3 在 hw2[14:12] 是独立字段**(不是 hw2[15:12])。`usat r2,#5,r1 = f381 0205`(Rd=hw2[11:8])。这类掩码位错配是 T1c #10 的根因,本次全部 objdump 确认到位,一次通过。 + +## 验证 + +- `ctest` 全量 **271/271 绿**(263 + 8 新单测,`test_cortex_m3_advanced.cpp`)。 +- 全部编码 `arm-none-eabi-as -mcpu=cortex-m3` + `objdump -d` 核验。 +- 固件 E2E(3 AC6 + gcc hal_uart)全绿,不破坏真实固件启动。 + +## 未做(策略性) + +- **BLX imm(T1)**:现 IllegalInstruction,M3 无 ARM 态,正确 fault(matrix §3:「现即可,补语义说明」)。 +- **ARMv7E-M DSP**(QADD/QSUB/QDADD/QDSUB、PKHBT/PKHTB、SEL、SXTAH/UXTAH/SXTB16、UMAAL、SMLAD 族、USAD8 族):`arm-none-eabi-as -mcpu=cortex-m3` 拒绝 = M3 没有,保持 fault(matrix §4,作用域外)。需验证它们 clean-fault(不误解码进现有 handler)——这是 T3(§4 门禁)。 +- **T3/§4**:作用域外指令的 clean-fault 验证;**T4/§5**:测试缺口 sweep(post-index、LDRD 全模式、flag sweep)。 + +## 成果 + +matrix §3 缺失指令基本补全(M3 范围内),模拟器指令覆盖从「够跑现有固件」提升到「ARMv7-M base 指令集覆盖」。剩余 §4(作用域门禁)+ §5(测试缺口)为收尾验证类工作。 diff --git a/include/arch/arm/cortex_m3/cortex_m3.hpp b/include/arch/arm/cortex_m3/cortex_m3.hpp index 9cbf082..77e60c1 100644 --- a/include/arch/arm/cortex_m3/cortex_m3.hpp +++ b/include/arch/arm/cortex_m3/cortex_m3.hpp @@ -59,6 +59,26 @@ class CortexM3CPU : public CPU { Expected fetch16(addr_t addr); CPUExpected execute_16bit(uint16_t insn); CPUExpected execute_32bit(uint16_t hw1, uint16_t hw2); + // 32-bit Thumb-2 family handlers — split out of execute_32bit so no single + // translation unit exceeds the DIRECTIVES 700-line cap. Each returns the + // result of its (already mask-matched) block; execute_32bit dispatches. + CPUExpected t32_addsub_plain_imm(uint16_t hw1, uint16_t hw2); + CPUExpected t32_dataproc_imm(uint16_t hw1, uint16_t hw2); + CPUExpected t32_dataproc_reg(uint16_t hw1, uint16_t hw2); + CPUExpected t32_misc_reverse(uint16_t hw1, uint16_t hw2); + CPUExpected t32_ssat_usat(uint16_t hw1, uint16_t hw2); + CPUExpected t32_shift_reg(uint16_t hw1, uint16_t hw2); + CPUExpected t32_loadstore_single(uint16_t hw1, uint16_t hw2); + CPUExpected t32_ldrex_strex(uint16_t hw1, uint16_t hw2); + CPUExpected t32_tbb_tbh(uint16_t hw1, uint16_t hw2); + CPUExpected t32_strd_ldrd(uint16_t hw1, uint16_t hw2); + CPUExpected t32_stm_ldm(uint16_t hw1, uint16_t hw2); + // Operand helpers shared across the 32-bit handlers (promoted from the + // execute_32bit-local lambdas so the split-out handlers can use them). + data_t rr(uint8_t idx); + CPUExpected wr(uint8_t idx, data_t val); + CPUExpected br(addr_t addr, Width w); + CPUExpected bw(addr_t addr, data_t val, Width w); CPU::CPUExpected read_pc_raw() const; CPU::CPUExpected write_reg(uint8_t index, data_t value); diff --git a/include/arch/arm/cortex_m3/def.h b/include/arch/arm/cortex_m3/def.h index a66f49d..2a72a64 100644 --- a/include/arch/arm/cortex_m3/def.h +++ b/include/arch/arm/cortex_m3/def.h @@ -10,6 +10,7 @@ static constexpr data_t PSR_N = 1u << 31; static constexpr data_t PSR_Z = 1u << 30; static constexpr data_t PSR_C = 1u << 29; static constexpr data_t PSR_V = 1u << 28; +static constexpr data_t PSR_Q = 1u << 27; static constexpr data_t PSR_T = 1u << 24; static constexpr uint16_t REGCNT = 16; diff --git a/include/arch/arm/cortex_m3/thumb_fields.hpp b/include/arch/arm/cortex_m3/thumb_fields.hpp index 2254aed..dea7366 100644 --- a/include/arch/arm/cortex_m3/thumb_fields.hpp +++ b/include/arch/arm/cortex_m3/thumb_fields.hpp @@ -93,5 +93,60 @@ constexpr uint8_t decode_key(uint16_t insn) { return (insn >> 11) & 0x1Fu; } +/// ARMv7-M barrel shift, returning both the shifted value and the shifter +/// carry-out (the C flag source for shift instructions). Pure model of the +/// ARM shift operation — no CPU state. +/// +/// type: 0=LSL, 1=LSR, 2=ASR, 3=ROR. +/// `amount` is the RESOLVED shift amount: for immediate shifts the caller +/// converts the encoded field (LSR/ASR/ROR encoded 0 → 32; LSL encoded 0 → 0). +/// `carry_in` is the current C flag, used when amount==0 (LSL/LSR/ASR leave C +/// unchanged; ROR #0 is RRX). +struct ShiftOut { + uint32_t value; + bool carry; +}; + +inline ShiftOut barrel_shift(uint8_t type, uint32_t value, uint8_t amount, + bool carry_in) { + if (amount == 0) { + if (type == 3) { // ROR #0 → RRX: (C:value) >> 1, C = value[0] + return {(static_cast(carry_in) << 31) | (value >> 1), + (value & 1u) != 0}; + } + return {value, carry_in}; // LSL/LSR/ASR #0 → unchanged, C unchanged + } + switch (type) { + case 0: { // LSL + if (amount >= 32) { + return {0u, amount == 32 ? ((value & 1u) != 0) : false}; + } + return {value << amount, ((value >> (32 - amount)) & 1u) != 0}; + } + case 1: { // LSR + if (amount >= 32) { + return {0u, (value & 0x80000000u) != 0}; + } + return {value >> amount, ((value >> (amount - 1)) & 1u) != 0}; + } + case 2: { // ASR + if (amount >= 32) { + bool sign = (value & 0x80000000u) != 0; + return {sign ? 0xFFFFFFFFu : 0u, sign}; + } + return {static_cast(static_cast(value) >> amount), + ((value >> (amount - 1)) & 1u) != 0}; + } + default: { // ROR (amount > 0) + uint8_t r = amount & 31u; + if (r == 0) { + return {value, (value & 0x80000000u) != 0}; + } + return {(value >> r) | (value << (32 - r)), + ((value >> (r - 1)) & 1u) != 0}; + } + } +} + } // namespace arm::cortex_m3::thumb } // namespace micro_forge::cpu diff --git a/src/arch/arm/cortex_m3/cortex_m3_thumb16.cpp b/src/arch/arm/cortex_m3/cortex_m3_thumb16.cpp index eaf393a..7f902ca 100644 --- a/src/arch/arm/cortex_m3/cortex_m3_thumb16.cpp +++ b/src/arch/arm/cortex_m3/cortex_m3_thumb16.cpp @@ -49,13 +49,26 @@ CPU::CPUExpected CortexM3CPU::execute_16bit(uint16_t insn) { return {}; }; - // ── CPSIE i / CPSID i ── - if ((insn & 0xFFF0u) == 0xB660u) { - primask_ &= ~1u; - return {}; - } - if ((insn & 0xFFF0u) == 0xB670u) { - primask_ |= 1u; + // ── CPS effect {i,f}: CPSIE (enable) / CPSID (disable) ── + // 0xB66x (CPSIE) / 0xB67x (CPSID); bit4 = 0/1 (enable/disable), + // bit1 = i (PRIMASK), bit0 = f (FAULTMASK). The old 0xFFF0 mask ignored + // bit[1:0], so cpsie/cpsid f silently acted on PRIMASK, not FAULTMASK. + if ((insn & 0xFFE0u) == 0xB660u) { + bool disable = (insn >> 4) & 1u; + if (insn & 0x2u) { // i → PRIMASK + if (disable) { + primask_ |= 1u; + } else { + primask_ &= ~1u; + } + } + if (insn & 0x1u) { // f → FAULTMASK + if (disable) { + faultmask_ |= 1u; + } else { + faultmask_ &= ~1u; + } + } return {}; } @@ -129,28 +142,27 @@ CPU::CPUExpected CortexM3CPU::execute_16bit(uint16_t insn) { case 0b00000: case 0b00001: case 0b00010: { - uint8_t op = (insn >> 11) & 0x3; + uint8_t op = (insn >> 11) & 0x3; // 0=LSL, 1=LSR, 2=ASR uint8_t imm = imm5(insn); uint8_t rm = rn3(insn); uint8_t rd = rd3(insn); data_t val = rr(rm); - data_t result; - - if (op == 0b00) { // LSL - result = imm == 0 ? val : val << imm; - } else if (op == 0b01) { // LSR - result = imm == 0 ? 0 : val >> imm; - } else { // ASR - result = - (imm == 0) - ? ((val & 0x80000000u) ? 0xFFFFFFFFu : 0) - : static_cast(static_cast(val) >> imm); - } + // LSR/ASR encoded shift of 0 means shift-by-32; LSL 0 = no shift. + uint8_t amount = (op == 0b00) ? imm : (imm == 0 ? 32 : imm); + auto [result, carry] = + barrel_shift(op, val, amount, (xpsr_ & PSR_C) != 0); auto res = wr(rd, result); if (!res) { return res; } update_nz(result); + // Shift instructions update C from the shifter carry-out (LSL #0 + // returns carry_in, so C is unchanged in that case). + if (carry) { + xpsr_ |= PSR_C; + } else { + xpsr_ &= ~PSR_C; + } break; } @@ -256,6 +268,9 @@ CPU::CPUExpected CortexM3CPU::execute_16bit(uint16_t insn) { uint8_t rd = rd3(insn); data_t a = rr(rd), b = rr(rm); data_t result; + // Set only by the shift-by-register ops (LSL/LSR/ASR/ROR): the + // shifter carry-out drives C. nullopt → C unchanged. + std::optional shift_carry; switch (op) { case 0x0: @@ -264,25 +279,34 @@ CPU::CPUExpected CortexM3CPU::execute_16bit(uint16_t insn) { case 0x1: result = a ^ b; break; - case 0x2: - result = a << (b & 0xFF); + case 0x2: { // LSL register + auto s = barrel_shift(0, a, b & 0xFF, (xpsr_ & PSR_C) != 0); + result = s.value; + shift_carry = s.carry; break; - case 0x3: - result = a >> (b & 0xFF); + } + case 0x3: { // LSR register + auto s = barrel_shift(1, a, b & 0xFF, (xpsr_ & PSR_C) != 0); + result = s.value; + shift_carry = s.carry; break; - case 0x4: - result = static_cast(static_cast(a) >> - (b & 0xFF)); + } + case 0x4: { // ASR register + auto s = barrel_shift(2, a, b & 0xFF, (xpsr_ & PSR_C) != 0); + result = s.value; + shift_carry = s.carry; break; + } case 0x5: result = a + b + ((xpsr_ & PSR_C) ? 1 : 0); break; case 0x6: result = a - b - ((xpsr_ & PSR_C) ? 0 : 1); break; - case 0x7: { - uint8_t n = (b & 0xFF) & 0x1F; - result = n ? ((a >> n) | (a << (32 - n))) : a; + case 0x7: { // ROR register + auto s = barrel_shift(3, a, b & 0xFF, (xpsr_ & PSR_C) != 0); + result = s.value; + shift_carry = s.carry; break; } case 0x8: @@ -317,6 +341,13 @@ CPU::CPUExpected CortexM3CPU::execute_16bit(uint16_t insn) { return res; } update_nz(result); + if (shift_carry) { + if (*shift_carry) { + xpsr_ |= PSR_C; + } else { + xpsr_ &= ~PSR_C; + } + } break; } @@ -532,6 +563,11 @@ CPU::CPUExpected CortexM3CPU::execute_16bit(uint16_t insn) { case 0b10111: { uint8_t sub_op = (insn >> 9) & 0x3; if (sub_op == 0b11) { + // BKPT #imm8 (0xBExx): no debugger attached → HardFault. + // (Was silently treated as NOP — coverage matrix §2 #6.) + if ((insn & 0xFF00u) == 0xBE00u) { + return trigger_hardfault(); + } if ((insn & 0xFF00u) == 0xBF00u && (insn & 0xFu) != 0) { uint8_t first_cond = (insn >> 4) & 0xFu; uint8_t mask = insn & 0xFu; diff --git a/src/arch/arm/cortex_m3/cortex_m3_thumb32.cpp b/src/arch/arm/cortex_m3/cortex_m3_thumb32.cpp index 720e64e..3a820f5 100644 --- a/src/arch/arm/cortex_m3/cortex_m3_thumb32.cpp +++ b/src/arch/arm/cortex_m3/cortex_m3_thumb32.cpp @@ -8,43 +8,113 @@ namespace micro_forge::cpu::arm::cortex_m3 { using namespace thumb; +// ── Operand helpers shared across the 32-bit Thumb-2 handlers ── +// Promoted from execute_32bit-local lambdas so the family handlers split into +// cortex_m3_thumb32_{loadstore,dataproc}.cpp can use them. Bodies unchanged. +data_t CortexM3CPU::rr(uint8_t idx) { + return regs_.read(idx).value_or(0); +} +CPU::CPUExpected CortexM3CPU::wr(uint8_t idx, data_t val) { + auto res = write_reg(idx, val); + if (!res) { + return std::unexpected{res.error()}; + } + return {}; +} +CPU::CPUExpected CortexM3CPU::br(addr_t addr, Width w) { + if (!bus_) { + record_bus_fault(BusError::InvalidDevice, addr, w); + return std::unexpected{CPUError::DataAccessFault}; + } + auto v = bus_->read(addr, w); + if (!v) { + record_bus_fault(v.error(), addr, w); + return std::unexpected{CPUError::DataAccessFault}; + } + return *v; +} +CPU::CPUExpected CortexM3CPU::bw(addr_t addr, data_t val, Width w) { + if (!bus_) { + record_bus_fault(BusError::InvalidDevice, addr, w); + return std::unexpected{CPUError::DataAccessFault}; + } + auto v = bus_->write(addr, val, w); + if (!v) { + record_bus_fault(v.error(), addr, w); + return std::unexpected{CPUError::DataAccessFault}; + } + return {}; +} + // ── 32-bit Thumb-2 decode ── +// +// This is the dispatcher: each mask is checked in the same order as before the +// split; the large data-processing and load/store families delegate to the +// t32_* handlers in cortex_m3_thumb32_{dataproc,loadstore}.cpp. Branches, +// MOVW/MOVT, MSR/MRS, bitfield, multiply/divide stay inline. CPU::CPUExpected CortexM3CPU::execute_32bit(uint16_t hw1, uint16_t hw2) { - auto rr = [&](uint8_t idx) -> data_t { - return regs_.read(idx).value_or(0); - }; - auto wr = [&](uint8_t idx, data_t val) -> CPUExpected { - auto res = write_reg(idx, val); - if (!res) { - return std::unexpected{res.error()}; - } - return {}; - }; - auto br = [&](addr_t addr, Width w) -> CPUExpected { - if (!bus_) { - record_bus_fault(BusError::InvalidDevice, addr, w); - return std::unexpected{CPUError::DataAccessFault}; - } - auto v = bus_->read(addr, w); - if (!v) { - record_bus_fault(v.error(), addr, w); - return std::unexpected{CPUError::DataAccessFault}; + auto read_special = [&]() -> data_t { + uint8_t sysm = hw2 & 0xFFu; + switch (sysm) { + case 0x00: + return xpsr_ & (PSR_N | PSR_Z | PSR_C | PSR_V | PSR_Q); + case 0x08: + return msp_; + case 0x09: + return psp_; + case 0x10: + return primask_; + case 0x11: + return basepri_; + case 0x13: + return faultmask_; + case 0x14: + return control_; + default: + return 0; } - return *v; }; - auto bw = [&](addr_t addr, data_t val, Width w) -> CPUExpected { - if (!bus_) { - record_bus_fault(BusError::InvalidDevice, addr, w); - return std::unexpected{CPUError::DataAccessFault}; - } - auto v = bus_->write(addr, val, w); - if (!v) { - record_bus_fault(v.error(), addr, w); - return std::unexpected{CPUError::DataAccessFault}; + + auto write_special = [&](data_t value) -> CPUExpected { + uint8_t sysm = hw2 & 0xFFu; + switch (sysm) { + case 0x00: + xpsr_ = (xpsr_ & ~(PSR_N | PSR_Z | PSR_C | PSR_V | PSR_Q)) | + (value & (PSR_N | PSR_Z | PSR_C | PSR_V | PSR_Q)) | + PSR_T; + return {}; + case 0x08: + msp_ = value & ~0x3u; + if (in_handler_mode_ || !(control_ & 0x2u)) { + return write_reg(13, msp_); + } + return {}; + case 0x09: + psp_ = value & ~0x3u; + if (!in_handler_mode_ && (control_ & 0x2u)) { + return write_reg(13, psp_); + } + return {}; + case 0x10: + primask_ = value & 1u; + return {}; + case 0x11: + basepri_ = value & 0xFFu; + return {}; + case 0x13: + faultmask_ = value & 1u; + return {}; + case 0x14: { + control_ = value & 0x3u; + data_t active_sp = + (!in_handler_mode_ && (control_ & 0x2u)) ? psp_ : msp_; + return write_reg(13, active_sp); + } + default: + return std::unexpected{CPUError::IllegalInstruction}; } - return {}; }; // ── BL / BLX ── @@ -146,76 +216,22 @@ CPU::CPUExpected CortexM3CPU::execute_32bit(uint16_t hw1, uint16_t hw2) { return wr(rd, val); } - // ── DMB / DSB / ISB ── + // ── DMB / DSB / ISB / CLREX ── if (hw1 == 0xF3BF && (hw2 & 0xFF0Fu) == 0x8F0Fu) { uint8_t option = hw2 & 0xFu; uint8_t op = (hw2 >> 4) & 0xFu; - if (option != 0xFu || (op != 0x4u && op != 0x5u && op != 0x6u)) { + // op=4 DSB, 5 DMB, 6 ISB; op=2 CLREX (no-op on a single-core sim). + if (option != 0xFu || + (op != 0x2u && op != 0x4u && op != 0x5u && op != 0x6u)) { return std::unexpected{CPUError::IllegalInstruction}; } return {}; } - auto read_special = [&]() -> data_t { - uint8_t sysm = hw2 & 0xFFu; - switch (sysm) { - case 0x00: - return xpsr_ & (PSR_N | PSR_Z | PSR_C | PSR_V); - case 0x08: - return msp_; - case 0x09: - return psp_; - case 0x10: - return primask_; - case 0x11: - return basepri_; - case 0x13: - return faultmask_; - case 0x14: - return control_; - default: - return 0; - } - }; - - auto write_special = [&](data_t value) -> CPUExpected { - uint8_t sysm = hw2 & 0xFFu; - switch (sysm) { - case 0x00: - xpsr_ = (xpsr_ & ~(PSR_N | PSR_Z | PSR_C | PSR_V)) | - (value & (PSR_N | PSR_Z | PSR_C | PSR_V)) | PSR_T; - return {}; - case 0x08: - msp_ = value & ~0x3u; - if (in_handler_mode_ || !(control_ & 0x2u)) { - return write_reg(13, msp_); - } - return {}; - case 0x09: - psp_ = value & ~0x3u; - if (!in_handler_mode_ && (control_ & 0x2u)) { - return write_reg(13, psp_); - } - return {}; - case 0x10: - primask_ = value & 1u; - return {}; - case 0x11: - basepri_ = value & 0xFFu; - return {}; - case 0x13: - faultmask_ = value & 1u; - return {}; - case 0x14: { - control_ = value & 0x3u; - data_t active_sp = - (!in_handler_mode_ && (control_ & 0x2u)) ? psp_ : msp_; - return write_reg(13, active_sp); - } - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - }; + // ── NOP.W / YIELD.W / SEV.W (T4 hints, f3af 80xx) ── no-op on this sim. + if (hw1 == 0xF3AF && (hw2 & 0xF000u) == 0x8000u) { + return {}; + } // ── MRS ── if ((hw1 & 0xFFF0) == 0xF3E0 && (hw2 & 0xF000) == 0x8000) { @@ -262,215 +278,30 @@ CPU::CPUExpected CortexM3CPU::execute_32bit(uint16_t hw1, uint16_t hw2) { return wr(rd, result); } + // ── SSAT / USAT (saturate; writes APSR.Q) ── + // Must precede dataproc-imm, which also matches 0xF3xx (insn[25]=0) and + // would misread SSAT as ADD-imm. mask 0xFFD0 frees hw1[5] (shift type). + if ((hw1 & 0xFFD0u) == 0xF300u || (hw1 & 0xFFD0u) == 0xF380u) { + return t32_ssat_usat(hw1, hw2); + } + // ── Add/subtract (plain imm12): insn[25]=1 (hw1[9]) ── - // addw/subw (S=0) and adds.w/subs.w (S=1). imm12 = i:imm3:imm8 packed - // PLAIN (not Thumb2ExpandImm). op = hw1[8:5]: 0000=ADD, 0101=SUB; S=hw1[4]. - // Modified-immediate (insn[25]=0, e.g. cmp.w #0x110000) keeps its own form. if ((hw1 & 0xF800) == 0xF000 && (hw1 & 0x0200) != 0 && (hw2 & 0x8000) == 0) { - uint8_t op = (hw1 >> 5) & 0xF; - bool s_bit = (hw1 >> 4) & 1; - uint8_t rn = hw1 & 0xF; - uint8_t rd = (hw2 >> 12) & 0xF; - uint32_t imm12 = (((hw1 >> 10) & 0x1u) << 11) | - (((hw2 >> 12) & 0x7u) << 8) | (hw2 & 0xFFu); - uint32_t a = rr(rn); - uint32_t result; - bool is_sub; - switch (op) { - case 0x0: - is_sub = false; - result = a + imm12; - break; // ADD.W/addw - case 0x5: - is_sub = true; - result = a - imm12; - break; // SUB.W/subw - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - if (s_bit) { - update_flags(is_sub ? FlagPostOperation::Sub - : FlagPostOperation::Add, - a, imm12, result); - } - return wr(rd, result); + return t32_addsub_plain_imm(hw1, hw2); } // ── Data processing (modified immediate): insn[25]=0 ── if ((hw1 & 0xF800) == 0xF000 && (hw1 & 0x0200) == 0 && (hw2 & 0x8000) == 0) { - uint8_t op2 = (hw1 >> 5) & 0xF; - bool s_bit = (hw1 >> 4) & 1; - uint8_t rn = thumb32::dp_rn(hw1); - uint8_t rd = thumb32::dp_rd(hw2); - uint32_t imm32 = - thumb32::expand_imm12((hw1 >> 10) & 1, (hw2 >> 12) & 7, hw2 & 0xFF); - uint32_t rn_val = rr(rn); - uint32_t result; - - switch (op2) { - case 0: - result = rn_val & imm32; - break; // AND - case 1: - result = rn_val & ~imm32; - break; // BIC - case 2: - result = (rn == 15) ? imm32 : (rn_val | imm32); - break; // ORR/MOV - case 4: - result = rn_val ^ imm32; - break; // EOR - case 8: - result = rn_val + imm32; - break; // ADD - case 10: - result = rn_val + imm32 + ((xpsr_ & PSR_C) ? 1u : 0u); - break; // ADC - case 11: { - uint32_t borrow = (xpsr_ & PSR_C) ? 0u : 1u; - result = rn_val - imm32 - borrow; - break; // SBC - } - case 13: - result = rn_val - imm32; - break; // SUB - case 14: - result = imm32 - rn_val; - break; // RSB - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - - if (s_bit) { - if (op2 == 8 || op2 == 10 || op2 == 13 || op2 == 14 || op2 == 11) { - uint32_t flag_rhs = - op2 == 10 ? imm32 + ((xpsr_ & PSR_C) ? 1u : 0u) - : op2 == 11 ? imm32 + ((xpsr_ & PSR_C) ? 0u : 1u) - : imm32; - update_flags(op2 <= 10 ? FlagPostOperation::Add - : FlagPostOperation::Sub, - rn_val, flag_rhs, result); - } else { - update_nz(result); - } - } - // CMP/CMN/TST/TEQ: S=1, Rd=15 → flags only, no register write - if (s_bit && rd == 15) { - return {}; - } - return wr(rd, result); + return t32_dataproc_imm(hw1, hw2); } // ── Load/Store single (immediate): str/ldr/strb/ldrb/strh/ldrh .W ── - // hw1[7] selects the immediate form: - // 1 → imm12 offset (T2/T3): addr = Rn + imm12, no writeback. - // 0 → imm8 with addressing modes, op = hw2[11:8]: - // 0=offset+, C=offset-, B=post+, 9=post-, F=pre+, D=pre-. - if ((hw1 & 0xFF00) == 0xF800) { - uint8_t rn = hw1 & 0xF; - bool load = (hw1 >> 4) & 1; - uint8_t size = (hw1 >> 5) & 0x3; - uint8_t rt = (hw2 >> 12) & 0xF; - uint32_t rn_val = rr(rn); - Width width; - switch (size) { - case 0: - width = Width::Byte; - break; - case 1: - width = Width::HalfWord; - break; - case 2: - width = Width::Word; - break; - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - - // ── LDR.W (literal): Rn == PC ── - // `ldr.w Rt, [pc, #imm12]` — PC-relative literal pool load (compiled - // `LDR Rd, =const`). addr = Align(PC+4, 4) + imm12, no writeback. - // Store-to-PC-relative is UNDEFINED → rejected for !load. - if (rn == 15) { - if (!load) { - return std::unexpected{CPUError::IllegalInstruction}; - } - uint32_t imm12 = hw2 & 0xFFFu; - auto pc_res = read_pc_raw(); - if (!pc_res) { - return std::unexpected{pc_res.error()}; - } - addr_t addr = ((*pc_res + 4) & ~0x3u) + imm12; - auto r = br(addr, width); - if (!r) { - return std::unexpected{r.error()}; - } - return wr(rt, *r); - } - - // Resolve effective address + optional writeback per immediate form. - addr_t addr = 0; - bool writeback = false; - data_t wb_val = 0; - if ((hw1 >> 7) & 1) { - // imm12 offset form (no writeback). - addr = rn_val + (hw2 & 0xFFFu); - } else { - uint8_t op = (hw2 >> 8) & 0xF; - uint32_t imm8 = hw2 & 0xFF; - switch (op) { - case 0x0: // [Rn, #+imm8] - addr = rn_val + imm8; - break; - case 0xC: // [Rn, #-imm8] - addr = rn_val - imm8; - break; - case 0xB: // [Rn], #+imm8 (post-index) - addr = rn_val; - wb_val = rn_val + imm8; - writeback = true; - break; - case 0x9: // [Rn], #-imm8 (post-index) - addr = rn_val; - wb_val = rn_val - imm8; - writeback = true; - break; - case 0xF: // [Rn, #+imm8]! (pre-index) - addr = rn_val + imm8; - wb_val = addr; - writeback = true; - break; - case 0xD: // [Rn, #-imm8]! (pre-index) - addr = rn_val - imm8; - wb_val = addr; - writeback = true; - break; - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - } - - if (load) { - auto v = br(addr, width); - if (!v) { - return std::unexpected{v.error()}; - } - auto w = wr(rt, *v); - if (!w) { - return w; - } - } else { - auto w = bw(addr, rr(rt), width); - if (!w) { - return w; - } - } - if (writeback) { - return wr(rn, wb_val); - } - return {}; + // 0xFE00 mask covers both 0xF8xx (unsigned str/ldr/b/h) and 0xF9xx + // (signed LDRSB.W/LDRSH.W); the handler sign-extends the 0xF9xx forms. + if ((hw1 & 0xFE00) == 0xF800) { + return t32_loadstore_single(hw1, hw2); } // ── UDIV / SDIV ── @@ -503,27 +334,35 @@ CPU::CPUExpected CortexM3CPU::execute_32bit(uint16_t hw1, uint16_t hw2) { uint8_t rd = (hw2 >> 8) & 0xFu; uint8_t ra = (hw2 >> 12) & 0xFu; uint32_t product = rr(rn) * rr(rm); - uint32_t result = - (hw2 & 0x0010u) ? (rr(ra) - product) : (product + rr(ra)); + // MUL.W (Ra=15) is MLA/MLS without an accumulator; rr(15) would fold + // the raw PC into the product. Treat Ra=15 as "no accumulate". + uint32_t acc = (ra == 15) ? 0u : rr(ra); + uint32_t result = (hw2 & 0x0010u) ? (acc - product) : (product + acc); return wr(rd, result); } - // ── SMULL / UMULL ── - if (((hw1 & 0xFFF0u) == 0xFB80u || (hw1 & 0xFFF0u) == 0xFBA0u) && + // ── SMULL/UMULL (no accumulate) and SMLAL/UMLAL (accumulate) ── + uint16_t mp_hw1 = hw1 & 0xFFF0u; + if ((mp_hw1 == 0xFB80u || mp_hw1 == 0xFBA0u || + mp_hw1 == 0xFBC0u || mp_hw1 == 0xFBE0u) && (hw2 & 0x00F0u) == 0x0000u) { uint8_t rn = hw1 & 0xFu; uint8_t rm = hw2 & 0xFu; uint8_t rdlo = (hw2 >> 12) & 0xFu; uint8_t rdhi = (hw2 >> 8) & 0xFu; - uint64_t result; - if ((hw1 & 0xFFF0u) == 0xFB80u) { - result = static_cast( - static_cast(static_cast(rr(rn))) * - static_cast(static_cast(rr(rm)))); - } else { - result = - static_cast(rr(rn)) * static_cast(rr(rm)); - } + bool accumulate = (mp_hw1 == 0xFBC0u || mp_hw1 == 0xFBE0u); + bool is_signed = (mp_hw1 == 0xFB80u || mp_hw1 == 0xFBC0u); + uint64_t product = + is_signed + ? static_cast( + static_cast(static_cast(rr(rn))) * + static_cast(static_cast(rr(rm)))) + : static_cast(rr(rn)) * static_cast(rr(rm)); + // SMLAL/UMLAL accumulate the existing RdHi:RdLo (read before write). + uint64_t result = accumulate + ? product + (static_cast(rr(rdhi)) << 32) + + rr(rdlo) + : product; auto lo = wr(rdlo, static_cast(result)); if (!lo) { return lo; @@ -533,274 +372,44 @@ CPU::CPUExpected CortexM3CPU::execute_32bit(uint16_t hw1, uint16_t hw2) { // ── Data processing (shifted register): AND, ORR, EOR, ADD, SUB, etc. ── if ((hw1 & 0xFE00) == 0xEA00 && (hw2 & 0x8000) == 0) { - uint8_t op = (hw1 >> 5) & 0xF; - bool s_bit = (hw1 >> 4) & 1; - uint8_t rn = hw1 & 0xF; - uint8_t rd = (hw2 >> 8) & 0xF; - uint8_t rm = hw2 & 0xF; - uint8_t imm3 = (hw2 >> 12) & 0x7; - uint8_t imm2 = (hw2 >> 6) & 0x3; - uint8_t shift_type = (hw2 >> 4) & 0x3; - uint8_t shift_n = (imm3 << 2) | imm2; - - uint32_t rm_val = rr(rm); - - uint32_t shifted; - switch (shift_type) { - case 0: - shifted = shift_n == 0 ? rm_val : rm_val << shift_n; - break; - case 1: - shifted = rm_val >> (shift_n == 0 ? 0 : shift_n); - break; - case 2: { - if (shift_n == 0) { - shifted = rm_val; - } else { - uint32_t sign = rm_val & 0x80000000u; - shifted = rm_val >> shift_n; - if (sign) { - shifted |= (0xFFFFFFFFu << (32 - shift_n)); - } - } - break; - } - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - - uint32_t rn_val = rr(rn); - uint32_t result; - switch (op) { - case 0: - result = rn_val & shifted; - break; - case 1: - result = rn_val & ~shifted; - break; - case 2: - result = (rn == 15) ? shifted : (rn_val | shifted); - break; - case 3: - result = ~shifted; - break; - case 4: - result = rn_val ^ shifted; - break; - case 8: - result = rn_val + shifted; - break; - case 13: - result = rn_val - shifted; - break; - case 14: - result = shifted - rn_val; - break; - default: - return std::unexpected{CPUError::IllegalInstruction}; - } - - if (s_bit) { - if (op == 8 || op == 13 || op == 14) { - update_flags(op <= 8 ? FlagPostOperation::Add - : FlagPostOperation::Sub, - rn_val, shifted, result); - } else { - update_nz(result); - } - } - // CMP/CMN/TST/TEQ: S=1, Rd=15 → flags only, no register write. - if (s_bit && rd == 15) { - return {}; - } - return wr(rd, result); + return t32_dataproc_reg(hw1, hw2); } // ── Shift register (LSL/LSR/ASR/ROR register) ── if ((hw1 & 0xFF00) == 0xFA00 && (hw2 & 0xF0F0) == 0xF000) { - uint8_t rn = hw1 & 0xF; - bool s_bit = (hw1 >> 4) & 1; - uint8_t shift_type = (hw1 >> 5) & 0x3; - uint8_t rd = (hw2 >> 8) & 0xF; - uint8_t rm = hw2 & 0xF; - - uint32_t value = rr(rn); - uint32_t shift = rr(rm) & 0xFFu; - uint32_t result = value; - - switch (shift_type) { - case 0: // LSL - result = shift == 0 ? value : (shift < 32 ? value << shift : 0); - break; - case 1: // LSR - result = shift == 0 ? value : (shift < 32 ? value >> shift : 0); - break; - case 2: // ASR - if (shift == 0) { - result = value; - } else if (shift >= 32) { - result = (value & 0x80000000u) ? 0xFFFFFFFFu : 0; - } else { - result = static_cast( - static_cast(value) >> shift); - } - break; - case 3: { // ROR - uint32_t rot = shift & 31u; - result = - rot == 0 ? value : ((value >> rot) | (value << (32 - rot))); - break; - } - } + return t32_shift_reg(hw1, hw2); + } - auto w = wr(rd, result); - if (!w) { - return w; - } - if (s_bit) { - update_nz(result); - } - return {}; + // ── CLZ / RBIT / REV.W / REV16.W / REVSH.W ── + // 0xFA00 with op2 (hw2[7:4]) != 0; shift_reg above already took op2==0. + if ((hw1 & 0xFF00u) == 0xFA00u && (hw2 & 0xF000u) == 0xF000u && + (hw2 & 0x00F0u) != 0u) { + return t32_misc_reverse(hw1, hw2); } // ── TBB / TBH (Table Branch) ── - if ((hw1 & 0xFFF0) == 0xE8D0 && (hw2 & 0xF0F0) == 0xF000) { - uint8_t rn = hw1 & 0xF; - uint8_t rm = hw2 & 0xF; - bool H = (hw2 >> 4) & 1; - - uint32_t pc_val = rr(15) + 4; - uint32_t base = (rn == 15) ? pc_val : rr(rn); - uint32_t index = (rm == 15) ? 0u : rr(rm); - - uint32_t halfwords; - if (H) { - auto v = br(base + index * 2, Width::HalfWord); - if (!v) { - return std::unexpected{v.error()}; - } - halfwords = *v; - } else { - auto v = br(base + index, Width::Byte); - if (!v) { - return std::unexpected{v.error()}; - } - halfwords = *v; - } + // hw2 mask 0xFFE0 (not 0xF0F0) frees hw2[4:0] — TBH's H-bit at [4] and + // Rm at [3:0] — so TBH (0xF010) and TBB with Rm≠0 (e.g. 0xF004) are not + // disqualified; otherwise they fall through to STRD/LDRD or LDREX. + if ((hw1 & 0xFFF0) == 0xE8D0 && (hw2 & 0xFFE0) == 0xF000) { + return t32_tbb_tbh(hw1, hw2); + } - addr_t target = pc_val + halfwords * 2; - return write_pc(target); + // ── LDREX / STREX (+B/+H) — single-core sim, no exclusive monitor ── + // Plain LD; STREX always reports success (Rd=0). Must precede the + // STRD/LDRD mask (0xFE40==0xE840), which otherwise swallows them. + if ((hw1 & 0xFF60u) == 0xE840u) { + return t32_ldrex_strex(hw1, hw2); } // ── STRD / LDRD (Store/Load Dual, immediate offset) ── if ((hw1 & 0xFE40) == 0xE840) { - bool P = (hw1 >> 8) & 1; - bool U = (hw1 >> 7) & 1; - bool W = (hw1 >> 5) & 1; - bool L = (hw1 >> 4) & 1; - uint8_t rn = hw1 & 0xF; - uint8_t rt = (hw2 >> 12) & 0xF; - uint8_t rt2 = (hw2 >> 8) & 0xF; - uint32_t offset = static_cast((hw2 & 0xFF)) * 4; - - uint32_t rn_val = rr(rn); - addr_t offset_addr = U ? (rn_val + offset) : (rn_val - offset); - addr_t addr = P ? offset_addr : rn_val; - - if (L) { - auto v1 = br(addr, Width::Word); - if (!v1) { - return std::unexpected{v1.error()}; - } - auto v2 = br(addr + 4, Width::Word); - if (!v2) { - return std::unexpected{v2.error()}; - } - auto w1 = wr(rt, *v1); - if (!w1) { - return w1; - } - auto w2 = wr(rt2, *v2); - if (!w2) { - return w2; - } - } else { - auto w1 = bw(addr, rr(rt), Width::Word); - if (!w1) { - return w1; - } - auto w2 = bw(addr + 4, rr(rt2), Width::Word); - if (!w2) { - return w2; - } - } - - if (W) { - return wr(rn, offset_addr); - } - return {}; + return t32_strd_ldrd(hw1, hw2); } // ── STM / LDM (Store/Load Multiple) ── if ((hw1 & 0xFE40) == 0xE800) { - bool U = (hw1 >> 7) & 1; - bool W = (hw1 >> 5) & 1; - bool L = (hw1 >> 4) & 1; - uint8_t rn = hw1 & 0xF; - uint16_t rlist = hw2; - - int count = std::popcount(rlist); - if (count == 0) { - return std::unexpected{CPUError::IllegalInstruction}; - } - - uint32_t rn_val = rr(rn); - bool decrement = !U; - addr_t start_addr = - decrement ? rn_val - static_cast(count * 4) : rn_val; - addr_t addr = start_addr; - - if (L) { - for (int i = 0; i < 16; i++) { - if (rlist & (1 << i)) { - auto v = br(addr, Width::Word); - if (!v) { - return std::unexpected{v.error()}; - } - if (i == 15) { - auto w = write_pc(*v); - if (!w) { - return w; - } - } else { - auto w = wr(i, *v); - if (!w) { - return w; - } - } - addr += 4; - } - } - } else { - for (int i = 0; i < 16; i++) { - if (rlist & (1 << i)) { - data_t val = (i == 15) ? (rr(15) + 4) : rr(i); - auto w = bw(addr, val, Width::Word); - if (!w) { - return w; - } - addr += 4; - } - } - } - - if (W) { - uint32_t new_rn = decrement - ? rn_val - static_cast(count * 4) - : rn_val + static_cast(count * 4); - return wr(rn, new_rn); - } - return {}; + return t32_stm_ldm(hw1, hw2); } return std::unexpected{CPUError::IllegalInstruction}; diff --git a/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp b/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp new file mode 100644 index 0000000..b679aa7 --- /dev/null +++ b/src/arch/arm/cortex_m3/cortex_m3_thumb32_dataproc.cpp @@ -0,0 +1,345 @@ +#include "arch/arm/cortex_m3/cortex_m3.hpp" +#include "arch/arm/cortex_m3/thumb32_fields.hpp" + +#include + +namespace micro_forge::cpu::arm::cortex_m3 { + +using namespace thumb; + +// ── Add/subtract (plain imm12): insn[25]=1 (hw1[9]) ── +// addw/subw (S=0) and adds.w/subs.w (S=1). imm12 = i:imm3:imm8 packed. +// PLAIN (not Thumb2ExpandImm). op = hw1[8:5]: 0000=ADD, 0101=SUB; S=hw1[4]. +// Dispatched when (hw1 & 0xF800)==0xF000 && (hw1 & 0x0200)!=0 && (hw2 & 0x8000)==0. +CPU::CPUExpected CortexM3CPU::t32_addsub_plain_imm(uint16_t hw1, + uint16_t hw2) { + uint8_t op = (hw1 >> 5) & 0xF; + bool s_bit = (hw1 >> 4) & 1; + uint8_t rn = hw1 & 0xF; + uint8_t rd = (hw2 >> 12) & 0xF; + uint32_t imm12 = (((hw1 >> 10) & 0x1u) << 11) | + (((hw2 >> 12) & 0x7u) << 8) | (hw2 & 0xFFu); + uint32_t a; + if (rn == 15) { + // ADDW/SUBW with Rn=PC is ADR.W: base = Align(PC+4,4), not raw PC. + auto pc_res = read_pc_raw(); + if (!pc_res) { + return std::unexpected{pc_res.error()}; + } + a = (*pc_res + 4) & ~0x3u; + } else { + a = rr(rn); + } + uint32_t result; + bool is_sub; + switch (op) { + case 0x0: + is_sub = false; + result = a + imm12; + break; // ADD.W/addw + case 0x5: + is_sub = true; + result = a - imm12; + break; // SUB.W/subw + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + if (s_bit) { + update_flags(is_sub ? FlagPostOperation::Sub + : FlagPostOperation::Add, + a, imm12, result); + } + return wr(rd, result); +} + +// ── Data processing (modified immediate): insn[25]=0 ── +// Dispatched when (hw1 & 0xF800)==0xF000 && (hw1 & 0x0200)==0 && (hw2 & 0x8000)==0. +CPU::CPUExpected CortexM3CPU::t32_dataproc_imm(uint16_t hw1, uint16_t hw2) { + uint8_t op2 = (hw1 >> 5) & 0xF; + bool s_bit = (hw1 >> 4) & 1; + uint8_t rn = thumb32::dp_rn(hw1); + uint8_t rd = thumb32::dp_rd(hw2); + uint32_t imm32 = + thumb32::expand_imm12((hw1 >> 10) & 1, (hw2 >> 12) & 7, hw2 & 0xFF); + uint32_t rn_val = rr(rn); + uint32_t result; + + switch (op2) { + case 0: + result = rn_val & imm32; + break; // AND + case 1: + result = rn_val & ~imm32; + break; // BIC + case 2: + result = (rn == 15) ? imm32 : (rn_val | imm32); + break; // ORR/MOV + case 3: + result = (rn == 15) ? ~imm32 : (rn_val | ~imm32); + break; // ORN/MVN + case 4: + result = rn_val ^ imm32; + break; // EOR + case 8: + result = rn_val + imm32; + break; // ADD + case 10: + result = rn_val + imm32 + ((xpsr_ & PSR_C) ? 1u : 0u); + break; // ADC + case 11: { + uint32_t borrow = (xpsr_ & PSR_C) ? 0u : 1u; + result = rn_val - imm32 - borrow; + break; // SBC + } + case 13: + result = rn_val - imm32; + break; // SUB + case 14: + result = imm32 - rn_val; + break; // RSB + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + + if (s_bit) { + if (op2 == 8) { // ADD + update_flags(FlagPostOperation::Add, rn_val, imm32, result); + } else if (op2 == 10) { // ADC + update_flags(FlagPostOperation::Add, rn_val, + imm32 + ((xpsr_ & PSR_C) ? 1u : 0u), result); + } else if (op2 == 11) { // SBC = rn - imm - !C + update_flags(FlagPostOperation::Sub, rn_val, + imm32 + ((xpsr_ & PSR_C) ? 0u : 1u), result); + } else if (op2 == 13) { // SUB = rn - imm + update_flags(FlagPostOperation::Sub, rn_val, imm32, result); + } else if (op2 == 14) { // RSB = imm - rn; minuend is the immediate. + update_flags(FlagPostOperation::Sub, imm32, rn_val, result); + } else { + update_nz(result); + } + } + // CMP/CMN/TST/TEQ: S=1, Rd=15 → flags only, no register write + if (s_bit && rd == 15) { + return {}; + } + return wr(rd, result); +} + +// ── Data processing (shifted register): AND, ORR, EOR, ADD, SUB, etc. ── +// Dispatched when (hw1 & 0xFE00)==0xEA00 && (hw2 & 0x8000)==0. +CPU::CPUExpected CortexM3CPU::t32_dataproc_reg(uint16_t hw1, uint16_t hw2) { + uint8_t op = (hw1 >> 5) & 0xF; + bool s_bit = (hw1 >> 4) & 1; + uint8_t rn = hw1 & 0xF; + uint8_t rd = (hw2 >> 8) & 0xF; + uint8_t rm = hw2 & 0xF; + uint8_t imm3 = (hw2 >> 12) & 0x7; + uint8_t imm2 = (hw2 >> 6) & 0x3; + uint8_t shift_type = (hw2 >> 4) & 0x3; + uint8_t shift_n = (imm3 << 2) | imm2; + + uint32_t rm_val = rr(rm); + + uint32_t shifted; + switch (shift_type) { + case 0: + shifted = shift_n == 0 ? rm_val : rm_val << shift_n; + break; + case 1: // LSR; imm3:imm2==0 means shift-by-32 → result 0. + shifted = shift_n == 0 ? 0u : (rm_val >> shift_n); + break; + case 2: { // ASR; imm3:imm2==0 means shift-by-32 → sign-extend. + if (shift_n == 0) { + shifted = (rm_val & 0x80000000u) ? 0xFFFFFFFFu : 0u; + } else { + uint32_t sign = rm_val & 0x80000000u; + shifted = rm_val >> shift_n; + if (sign) { + shifted |= (0xFFFFFFFFu << (32 - shift_n)); + } + } + break; + } + case 3: { // ROR; shift_n==0 means RRX (rotate-right-extend via C). + if (shift_n == 0) { + bool carry_in = (xpsr_ & PSR_C) != 0; + shifted = (carry_in ? 0x80000000u : 0u) | (rm_val >> 1); + } else { + uint8_t n = shift_n & 0x1Fu; + shifted = (rm_val >> n) | (rm_val << (32 - n)); + } + break; + } + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + + uint32_t rn_val = rr(rn); + uint32_t result; + switch (op) { + case 0: + result = rn_val & shifted; + break; + case 1: + result = rn_val & ~shifted; + break; + case 2: + result = (rn == 15) ? shifted : (rn_val | shifted); + break; + case 3: // ORN = Rn | ~shifted; Rn=15 collapses to MVN (~shifted). + result = (rn == 15) ? ~shifted : (rn_val | ~shifted); + break; + case 4: + result = rn_val ^ shifted; + break; + case 8: + result = rn_val + shifted; + break; + case 13: + result = rn_val - shifted; + break; + case 14: + result = shifted - rn_val; + break; + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + + if (s_bit) { + if (op == 8) { // ADD + update_flags(FlagPostOperation::Add, rn_val, shifted, result); + } else if (op == 13) { // SUB = rn - shifted + update_flags(FlagPostOperation::Sub, rn_val, shifted, result); + } else if (op == 14) { // RSB = shifted - rn; minuend is the operand. + update_flags(FlagPostOperation::Sub, shifted, rn_val, result); + } else { + update_nz(result); + } + } + // CMP/CMN/TST/TEQ: S=1, Rd=15 → flags only, no register write. + if (s_bit && rd == 15) { + return {}; + } + return wr(rd, result); +} + +// ── Shift register (LSL/LSR/ASR/ROR register) ── +// Dispatched when (hw1 & 0xFF00)==0xFA00 && (hw2 & 0xF0F0)==0xF000. +CPU::CPUExpected CortexM3CPU::t32_shift_reg(uint16_t hw1, uint16_t hw2) { + uint8_t rn = hw1 & 0xF; + bool s_bit = (hw1 >> 4) & 1; + uint8_t shift_type = (hw1 >> 5) & 0x3; + uint8_t rd = (hw2 >> 8) & 0xF; + uint8_t rm = hw2 & 0xF; + + uint32_t value = rr(rn); + uint8_t amount = rr(rm) & 0xFFu; + auto [result, carry] = + barrel_shift(shift_type, value, amount, (xpsr_ & PSR_C) != 0); + + auto w = wr(rd, result); + if (!w) { + return w; + } + if (s_bit) { + update_nz(result); + if (carry) { + xpsr_ |= PSR_C; + } else { + xpsr_ &= ~PSR_C; + } + } + return {}; +} + +// ── CLZ / RBIT / REV.W / REV16.W / REVSH.W ── +// Dispatched when (hw1 & 0xFF00)==0xFA00 && (hw2 & 0xF000)==0xF000 && +// (hw2 & 0x00F0)!=0 (op2 present; shift_reg handles op2==0). +CPU::CPUExpected CortexM3CPU::t32_misc_reverse(uint16_t hw1, + uint16_t hw2) { + uint8_t rd = (hw2 >> 8) & 0xFu; + uint8_t rn = hw1 & 0xFu; + uint8_t op2 = (hw2 >> 4) & 0xFu; + uint32_t v = rr(rn); + + // CLZ (hw1[7:4]=0xB, op2=8) vs REV.W (hw1[7:4]=0x9, op2=8). + if (op2 == 0x8u && (hw1 & 0x00F0u) == 0x00B0u) { + return wr(rd, std::countl_zero(v)); + } + uint32_t result; + switch (op2) { + case 0x8u: // REV.W — byte-reverse (same result as 16-bit REV) + result = ((v & 0x000000FFu) << 24) | ((v & 0x0000FF00u) << 8) | + ((v & 0x00FF0000u) >> 8) | ((v & 0xFF000000u) >> 24); + break; + case 0x9u: // REV16.W — two halfword byte-swaps + result = ((v & 0x00FF00FFu) << 8) | ((v & 0xFF00FF00u) >> 8); + break; + case 0xAu: { // RBIT — bit-reverse all 32 bits + v = ((v & 0x55555555u) << 1) | ((v & 0xAAAAAAAAu) >> 1); + v = ((v & 0x33333333u) << 2) | ((v & 0xCCCCCCCCu) >> 2); + v = ((v & 0x0F0F0F0Fu) << 4) | ((v & 0xF0F0F0F0u) >> 4); + v = ((v & 0x00FF00FFu) << 8) | ((v & 0xFF00FF00u) >> 8); + result = (v << 16) | (v >> 16); + break; + } + case 0xBu: { // REVSH.W — sign-extend low halfword byte-swap + uint32_t r = ((v & 0x00FFu) << 8) | ((v & 0xFF00u) >> 8); + result = static_cast(static_cast( + static_cast(r & 0xFFFFu))); + break; + } + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + return wr(rd, result); +} + +// ── SSAT / USAT (saturate; writes APSR.Q on saturation) ── +// Dispatched when (hw1 & 0xFFD0)==0xF300 (SSAT) / 0xF380 (USAT). +CPU::CPUExpected CortexM3CPU::t32_ssat_usat(uint16_t hw1, uint16_t hw2) { + bool is_usat = (hw1 & 0x0080u) != 0u; // bit7: SSAT=0, USAT=1 + bool asr = (hw1 & 0x0020u) != 0u; // bit5: shift type (1=asr, 0=lsl) + uint8_t rn = hw1 & 0xFu; + uint8_t rd = (hw2 >> 8) & 0xFu; + uint8_t field = hw2 & 0x1Fu; // sat width field + uint8_t imm3 = (hw2 >> 12) & 0x7u; + uint8_t imm2 = (hw2 >> 6) & 0x3u; + uint8_t shift = (imm3 << 2) | imm2; + + int32_t val = static_cast(rr(rn)); + val = asr ? (val >> shift) + : static_cast(static_cast(val) << shift); + int64_t v = val; + + if (is_usat) { + int64_t hi = (1ll << field) - 1; // USAT range [0, 2^field - 1] + uint32_t result; + if (v < 0) { + result = 0u; + xpsr_ |= PSR_Q; + } else if (v > hi) { + result = static_cast(hi); + xpsr_ |= PSR_Q; + } else { + result = static_cast(v); + } + return wr(rd, result); + } + // SSAT range [-2^(field), 2^(field) - 1] (sat = field + 1). + int64_t lo = -(1ll << field); + int64_t hi = (1ll << field) - 1; + uint32_t result; + if (v < lo) { + result = static_cast(static_cast(lo)); + xpsr_ |= PSR_Q; + } else if (v > hi) { + result = static_cast(hi); + xpsr_ |= PSR_Q; + } else { + result = static_cast(v); + } + return wr(rd, result); +} + +} // namespace micro_forge::cpu::arm::cortex_m3 diff --git a/src/arch/arm/cortex_m3/cortex_m3_thumb32_loadstore.cpp b/src/arch/arm/cortex_m3/cortex_m3_thumb32_loadstore.cpp new file mode 100644 index 0000000..e1a7803 --- /dev/null +++ b/src/arch/arm/cortex_m3/cortex_m3_thumb32_loadstore.cpp @@ -0,0 +1,324 @@ +#include "arch/arm/cortex_m3/cortex_m3.hpp" +#include "arch/arm/cortex_m3/thumb32_fields.hpp" + +#include +#include + +namespace micro_forge::cpu::arm::cortex_m3 { + +using namespace thumb; + +// ── Load/Store single data item (.W): str/ldr/strb/ldrb/strh/ldrh .W ── +// hw1[7] selects the immediate form: +// 1 → imm12 offset (T2/T3): addr = Rn + imm12, no writeback. +// 0 → imm8 with addressing modes, op = hw2[11:8]: +// 0=offset+, C=offset-, B=post+, 9=post-, F=pre+, D=pre-. +// Dispatched from execute_32bit when (hw1 & 0xFF00) == 0xF800. +CPU::CPUExpected CortexM3CPU::t32_loadstore_single(uint16_t hw1, + uint16_t hw2) { + uint8_t rn = hw1 & 0xF; + bool load = (hw1 >> 4) & 1; + uint8_t size = (hw1 >> 5) & 0x3; + uint8_t rt = (hw2 >> 12) & 0xF; + Width width; + switch (size) { + case 0: + width = Width::Byte; + break; + case 1: + width = Width::HalfWord; + break; + case 2: + width = Width::Word; + break; + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + + // hw1[8]=1 selects the 0xF9xx signed forms (LDRSB.W/LDRSH.W); the load + // path sign-extends. Signed stores do not exist. + bool sign = (hw1 >> 8) & 1; + if (sign && !load) { + return std::unexpected{CPUError::IllegalInstruction}; + } + + // Effective base register value. Rn=PC uses Align(PC+4,4) — covers both + // `ldr.w Rt, [pc, #imm12]` (literal pool, compiled `LDR Rd, =const`) and + // PC-relative register/imm8 forms. Store-to-PC-relative is UNDEFINED. + uint32_t base; + if (rn == 15) { + auto pc_res = read_pc_raw(); + if (!pc_res) { + return std::unexpected{pc_res.error()}; + } + base = (*pc_res + 4) & ~0x3u; + } else { + base = rr(rn); + } + + // Resolve effective address + optional writeback per form. + // hw1[7]=1 → imm12 offset (T2/T3), no writeback. + // hw1[7]=0, hw2[11:8]=0 → register offset [Rn, Rm, LSL #imm2] (T2). + // hw1[7]=0, hw2[11:8]≠0 → imm8 addressing modes (T4): C/B/9/F/D. + // (Per objdump: a positive small offset always folds into imm12, so the + // hw2[11:8]=0 slot is exclusively the register-offset form.) + addr_t addr = 0; + bool writeback = false; + data_t wb_val = 0; + if ((hw1 >> 7) & 1) { + // imm12 offset form (no writeback). Store-to-PC-relative is UNDEFINED. + if (rn == 15 && !load) { + return std::unexpected{CPUError::IllegalInstruction}; + } + addr = base + (hw2 & 0xFFFu); + } else { + uint8_t op = (hw2 >> 8) & 0xF; + if (op == 0x0) { + // Register offset: [Rn, Rm, LSL #imm2], no writeback. + uint8_t rm = hw2 & 0xF; + uint8_t shift = (hw2 >> 4) & 0x3; + addr = base + (rr(rm) << shift); + } else { + // imm8 with addressing modes (T4). + uint32_t imm8 = hw2 & 0xFFu; + switch (op) { + case 0xC: // [Rn, #-imm8] + addr = base - imm8; + break; + case 0xB: // [Rn], #+imm8 (post-index) + addr = base; + wb_val = base + imm8; + writeback = true; + break; + case 0x9: // [Rn], #-imm8 (post-index) + addr = base; + wb_val = base - imm8; + writeback = true; + break; + case 0xF: // [Rn, #+imm8]! (pre-index) + addr = base + imm8; + wb_val = addr; + writeback = true; + break; + case 0xD: // [Rn, #-imm8]! (pre-index) + addr = base - imm8; + wb_val = addr; + writeback = true; + break; + default: + return std::unexpected{CPUError::IllegalInstruction}; + } + } + } + + if (load) { + auto v = br(addr, width); + if (!v) { + return std::unexpected{v.error()}; + } + data_t val = *v; + if (sign) { // LDRSB.W / LDRSH.W: sign-extend byte/half to 32 bits. + val = (width == Width::Byte) + ? static_cast( + static_cast(static_cast(val))) + : static_cast( + static_cast(static_cast(val))); + } + auto w = wr(rt, val); + if (!w) { + return w; + } + } else { + auto w = bw(addr, rr(rt), width); + if (!w) { + return w; + } + } + if (writeback) { + return wr(rn, wb_val); + } + return {}; +} + +// ── TBB / TBH (Table Branch) ── +// Dispatched when (hw1 & 0xFFF0) == 0xE8D0 && (hw2 & 0xF0F0) == 0xF000. +// (TBH's H-bit handling here is a known T1 bug — see coverage matrix F32-9.) +CPU::CPUExpected CortexM3CPU::t32_tbb_tbh(uint16_t hw1, uint16_t hw2) { + uint8_t rn = hw1 & 0xF; + uint8_t rm = hw2 & 0xF; + bool H = (hw2 >> 4) & 1; + + uint32_t pc_val = rr(15) + 4; + uint32_t base = (rn == 15) ? pc_val : rr(rn); + uint32_t index = (rm == 15) ? 0u : rr(rm); + + uint32_t halfwords; + if (H) { + auto v = br(base + index * 2, Width::HalfWord); + if (!v) { + return std::unexpected{v.error()}; + } + halfwords = *v; + } else { + auto v = br(base + index, Width::Byte); + if (!v) { + return std::unexpected{v.error()}; + } + halfwords = *v; + } + + addr_t target = pc_val + halfwords * 2; + return write_pc(target); +} + +// ── LDREX / STREX (+B/+H) — single-core sim, no exclusive monitor ── +// Dispatched when (hw1 & 0xFF60)==0xE840: the exclusive space (P=0 & W=0), +// which STRD/LDRD never occupy (those always set P or W). LDREX → plain load; +// STREX → plain store with Rd=0 (no monitor → always "succeeds"). +// hw1[4]=L: STREX(0) / LDREX(1). hw1[7]: 0=word, 1=byte/half. +// word: LDREX Rd=hw2[15:12]; STREX Rt=hw2[15:12], Rd=hw2[11:8]. +// byte/h: LDREX Rd=hw2[15:12]; STREX Rt=hw2[15:12], Rd=hw2[3:0]; size=hw2[7:4]. +CPU::CPUExpected CortexM3CPU::t32_ldrex_strex(uint16_t hw1, + uint16_t hw2) { + uint8_t rn = hw1 & 0xFu; + bool load = (hw1 >> 4) & 1u; + bool byte_half = (hw1 >> 7) & 1u; + + Width width = Width::Word; + if (byte_half) { + width = (((hw2 >> 4) & 0xFu) == 5u) ? Width::HalfWord : Width::Byte; + } + uint32_t base = rr(rn); + + if (load) { + uint8_t rd = (hw2 >> 12) & 0xFu; + auto v = br(base, width); + if (!v) { + return std::unexpected{v.error()}; + } + return wr(rd, *v); + } + // STREX: store Rt, write success status Rd=0. + uint8_t rt = (hw2 >> 12) & 0xFu; + uint8_t rd = byte_half ? (hw2 & 0xFu) : ((hw2 >> 8) & 0xFu); + auto w = bw(base, rr(rt), width); + if (!w) { + return w; + } + return wr(rd, 0u); +} + +// ── STRD / LDRD (Store/Load Dual, immediate offset) ── +// Dispatched when (hw1 & 0xFE40) == 0xE840. +CPU::CPUExpected CortexM3CPU::t32_strd_ldrd(uint16_t hw1, uint16_t hw2) { + bool P = (hw1 >> 8) & 1; + bool U = (hw1 >> 7) & 1; + bool W = (hw1 >> 5) & 1; + bool L = (hw1 >> 4) & 1; + uint8_t rn = hw1 & 0xF; + uint8_t rt = (hw2 >> 12) & 0xF; + uint8_t rt2 = (hw2 >> 8) & 0xF; + uint32_t offset = static_cast((hw2 & 0xFF)) * 4; + + uint32_t rn_val = rr(rn); + addr_t offset_addr = U ? (rn_val + offset) : (rn_val - offset); + addr_t addr = P ? offset_addr : rn_val; + + if (L) { + auto v1 = br(addr, Width::Word); + if (!v1) { + return std::unexpected{v1.error()}; + } + auto v2 = br(addr + 4, Width::Word); + if (!v2) { + return std::unexpected{v2.error()}; + } + auto w1 = wr(rt, *v1); + if (!w1) { + return w1; + } + auto w2 = wr(rt2, *v2); + if (!w2) { + return w2; + } + } else { + auto w1 = bw(addr, rr(rt), Width::Word); + if (!w1) { + return w1; + } + auto w2 = bw(addr + 4, rr(rt2), Width::Word); + if (!w2) { + return w2; + } + } + + if (W) { + return wr(rn, offset_addr); + } + return {}; +} + +// ── STM / LDM (Store/Load Multiple) ── +// Dispatched when (hw1 & 0xFE40) == 0xE800. +CPU::CPUExpected CortexM3CPU::t32_stm_ldm(uint16_t hw1, uint16_t hw2) { + bool U = (hw1 >> 7) & 1; + bool W = (hw1 >> 5) & 1; + bool L = (hw1 >> 4) & 1; + uint8_t rn = hw1 & 0xF; + uint16_t rlist = hw2; + + int count = std::popcount(rlist); + if (count == 0) { + return std::unexpected{CPUError::IllegalInstruction}; + } + + uint32_t rn_val = rr(rn); + bool decrement = !U; + addr_t start_addr = + decrement ? rn_val - static_cast(count * 4) : rn_val; + addr_t addr = start_addr; + + if (L) { + for (int i = 0; i < 16; i++) { + if (rlist & (1 << i)) { + auto v = br(addr, Width::Word); + if (!v) { + return std::unexpected{v.error()}; + } + if (i == 15) { + auto w = write_pc(*v); + if (!w) { + return w; + } + } else { + auto w = wr(i, *v); + if (!w) { + return w; + } + } + addr += 4; + } + } + } else { + for (int i = 0; i < 16; i++) { + if (rlist & (1 << i)) { + data_t val = (i == 15) ? (rr(15) + 4) : rr(i); + auto w = bw(addr, val, Width::Word); + if (!w) { + return w; + } + addr += 4; + } + } + } + + if (W) { + uint32_t new_rn = decrement + ? rn_val - static_cast(count * 4) + : rn_val + static_cast(count * 4); + return wr(rn, new_rn); + } + return {}; +} + +} // namespace micro_forge::cpu::arm::cortex_m3 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 51b27d0..93c51d7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -171,6 +171,19 @@ if(TARGET hello_firmware AND TARGET blink_firmware AND TARGET systick_firmware) gtest_discover_tests(test_e2e) endif() +# ── armcc/AC6 固件语料 E2E(T5c)── +# 预编译 .axf fixture 提交进 test/firmware/armcc/;CI 无 Keil 故不重建。 +add_executable(test_firmware_armcc + test_firmware_armcc.cpp +) +target_link_libraries(test_firmware_armcc + PRIVATE micro_forge GTest::gtest_main +) +target_compile_definitions(test_firmware_armcc PRIVATE + ARMCC_FW_DIR="${CMAKE_CURRENT_SOURCE_DIR}/firmware/armcc" +) +gtest_discover_tests(test_firmware_armcc) + # ── CLI 测试(drives the `micro-forge` binary) ── if(TARGET micro-forge AND TARGET hello_firmware) add_executable(test_cli diff --git a/test/firmware/armcc/REGENERATE.md b/test/firmware/armcc/REGENERATE.md new file mode 100644 index 0000000..82a687b --- /dev/null +++ b/test/firmware/armcc/REGENERATE.md @@ -0,0 +1,58 @@ +# armcc/AC6 固件语料 — 重生成指南 (T5c) + +`test/firmware/armcc/*.ac6.axf` 是 STM32CubeF1 **STM32F103RB-Nucleo** 示例在 +**Keil MDK + Arm Compiler 6 (armclang)** 下预编译的 ELF fixture,作为 ctest E2E +回归门禁。CI 无 Keil,故二进制提交进仓库;仅在需要刷新 codegen 时本地重编。 + +## 当前语料 + +| fixture | CubeF1 示例 | 覆盖 | +|---|---|---| +| `nucleo_f103rb_tim_timebase.ac6.axf` | TIM/TIM_TimeBase | TIM UIF / 中断 | +| `nucleo_f103rb_uart_printf.ac6.axf` | UART/UART_Printf | USART TX (printf 重定向) | +| `nucleo_f103rb_gpio_iotoggle.ac6.axf` | GPIO/GPIO_IOToggle | GPIO 翻转 | + +每份均 `ELF32 / ARM / entry=0x80000ed`,与 Keil F103.axf 同款,`Stm32f103Soc::load_elf` 直接加载。 + +## 环境 + +- Keil MDK + AC6 (armclang) + **STM32F1xx_DFP 2.4.1** + CMSIS 6.x +- Pack 仓库自定义在 `D:\MDK-Pack`;UV4 在 `D:\MDK\UV4\UV4.exe` +- headless 构建:`UV4.exe -b -o `(退出码 0=干净 / 1=警告但成功出 .axf / 2+=错) + +## 为什么不能在 WSL 文件系统上编 + +Keil 的编译器响应文件(`.__i`)在 WSL fs(9p,经 `\\wsl.localhost` 或映射盘符)上 +**创建失败**。必须编在**本地 NTFS**。故把最小子树复制到 `D:\mf\STM32CubeF1\` +(保留 `..\..\..\..\..\Drivers` 的相对目录深度),在那里编。 + +## CubeF1 AC5→AC6 迁移补丁(每个 .uvprojx 必做 3 处) + +CubeF1 示例工程是 AC5 配置;新版 MDK 只有 AC6,需三处补丁: + +1. **选 AC6** — 在 `` 层(紧跟 `ARM-ADS` 之后) + 插入 `1`。(注意:必须在 `` 直接子级,不是 `` 内。) +2. **删 `--C99`** — `--C99` 清空 + (AC5 flag,AC6 不认;C99 由 AC6 默认提供)。 +3. **删语言标准字段** — 删 ``/``/``。 + 否则 GUI 切 AC6 时 Keil 写入的值会强制 C90(`inline` 报错);删掉走 AC6 默认 gnu11。 + +GUI 等价操作:Options for Target → Target 页 → ARM Compiler 选 **AC6**; +C/C++ 页 → Language C 选 **gnu11**(勿选 C90);不要勾 AC5 的 C99 mode。 + +## 重编步骤 + +1. 复制最小子树到 `D:\mf\STM32CubeF1\`: + `Drivers/{STM32F1xx_HAL_Driver, BSP, CMSIS}`(CMSIS 可去 DSP)+ 选定的 `Projects/STM32F103RB-Nucleo/Examples/`。 +2. 对每个 `.uvprojx` 应用上述 3 补丁。 +3. 跑 `D:\mf\build_corpus.bat`(遍历编 GPIO/TIM/UART)。 +4. 把产出的 `MDK-ARM\STM32F103RB_Nucleo\STM32F103RB_Nucleo.axf` 拷成 + `test/firmware/armcc/nucleo_f103rb_.ac6.axf`。 +5. 新增示例:在 `test/test_firmware_armcc.cpp` 加一个 + `TEST(FirmwareArmcc, BootsClean)`。 + +## 常见坑 + +- **WSL→Windows interop 偶发挂**(`exec format error`):挂了就在 Windows 原生跑 `.bat`(双击或 PowerShell)。`/mnt/d/mf` 从 WSL 仍可正常读写,故编完拷回不阻塞。 +- **免费版(无 license)** 32KB 代码大小限制;Nucleo 小示例远低于此。 +- 第三方 `third_party/STM32CubeF1` 的 `.uvprojx` 补丁**不入库**(vendored 子模块保持干净);本文件即权威配方。 diff --git a/test/firmware/armcc/nucleo_f103rb_gpio_iotoggle.ac6.axf b/test/firmware/armcc/nucleo_f103rb_gpio_iotoggle.ac6.axf new file mode 100755 index 0000000..fd9f3a0 Binary files /dev/null and b/test/firmware/armcc/nucleo_f103rb_gpio_iotoggle.ac6.axf differ diff --git a/test/firmware/armcc/nucleo_f103rb_tim_timebase.ac6.axf b/test/firmware/armcc/nucleo_f103rb_tim_timebase.ac6.axf new file mode 100755 index 0000000..ab0b335 Binary files /dev/null and b/test/firmware/armcc/nucleo_f103rb_tim_timebase.ac6.axf differ diff --git a/test/firmware/armcc/nucleo_f103rb_uart_printf.ac6.axf b/test/firmware/armcc/nucleo_f103rb_uart_printf.ac6.axf new file mode 100755 index 0000000..d6eb8e7 Binary files /dev/null and b/test/firmware/armcc/nucleo_f103rb_uart_printf.ac6.axf differ diff --git a/test/test_cortex_m3_advanced.cpp b/test/test_cortex_m3_advanced.cpp index c287266..2e4c575 100644 --- a/test/test_cortex_m3_advanced.cpp +++ b/test/test_cortex_m3_advanced.cpp @@ -192,6 +192,56 @@ TEST_F(CortexM3Test, StrbWidePreIndexNegative) { EXPECT_EQ(*v, 0x77u); } +TEST_F(CortexM3Test, LoadStoreWideRegisterOffset) { + // Register offset (T2): addr = Rn + (Rm << shift2). The #9 bug treated + // hw2[7:0] as imm8, computing r1+2 instead of r1+r2. + // str.w r0,[r1,r2] = F841 0002 → r1 + r2 + // ldr.w r4,[r1,r2,lsl#3] = F851 4032 → r1 + (r2<<3) + load_program({0xF841, 0x0002, 0xF851, 0x4032}); + reset_cpu(); + set_reg(1, 0x100u); + set_reg(2, 0x40u); + set_reg(0, 0x12345678u); + uint32_t lit = 0xCAFEF00Du; + ASSERT_TRUE(mem_.load(0x300u, {reinterpret_cast(&lit), 4}) + .has_value()); + start_cpu(); + + ASSERT_TRUE(cpu_->step().has_value()); // str.w r0,[r1,r2] → 0x140 + auto w = bus_.read(0x140u, Width::Word); + ASSERT_TRUE(w.has_value()); + EXPECT_EQ(*w, 0x12345678u); + // Regression guard: the buggy imm8 path wrote to r1+2 = 0x102 instead. + auto bad = bus_.read(0x102u, Width::Word); + ASSERT_TRUE(bad.has_value()); + EXPECT_NE(*bad, 0x12345678u); + + ASSERT_TRUE(cpu_->step().has_value()); // ldr.w r4,[r1,r2,lsl#3] → 0x300 + EXPECT_EQ(reg(4), 0xCAFEF00Du); +} + +TEST_F(CortexM3Test, LoadStoreWideRegisterOffsetByteHalf) { + // strb.w r0,[r1,r2] = F801 0002 (byte → r1+r2) + // ldrh.w r4,[r1,r3] = F831 4003 (half ← r1+r3) + load_program({0xF801, 0x0002, 0xF831, 0x4003}); + reset_cpu(); + set_reg(1, 0x100u); + set_reg(2, 0x10u); // strb → 0x110 + set_reg(3, 0x40u); // ldrh → 0x140 + set_reg(0, 0xABu); + uint16_t lit = 0xBEEFu; + ASSERT_TRUE(mem_.load(0x140u, {reinterpret_cast(&lit), 2}) + .has_value()); + start_cpu(); + + ASSERT_TRUE(cpu_->step().has_value()); // strb.w → 0x110 + auto b = bus_.read(0x110u, Width::Byte); + ASSERT_TRUE(b.has_value()); + EXPECT_EQ(*b, 0xABu); + ASSERT_TRUE(cpu_->step().has_value()); // ldrh.w r4,[r1,r3] → 0x140 + EXPECT_EQ(reg(4), 0xBEEFu); +} + TEST_F(CortexM3Test, CmpWideShiftedRegDoesNotWritePc) { // 0xEBB0 0F81 = cmp.w r0, r1, lsl #2 (Rd=15, S=1 → flags only). // The F103 HAL_RCC_ClockConfig PLL-wait opcode. r0=10, r1=3 → 10-(3<<2)=-2. @@ -401,3 +451,224 @@ TEST_F(CortexM3Test, TbbUsesPcPlusFourAsBranchBase) { ASSERT_TRUE(cpu_->step().has_value()); EXPECT_EQ(reg(1), 2u); } + +// ── T1 静默错误修复单测(coverage matrix §2 #2–#11)── + +TEST_F(CortexM3Test, OrnWideRegisterIncludesRn) { + // #2: orn.w r0,r1,r2 (ea61 0002) = r1 | ~r2; bug gave ~shifted (dropped Rn). + load_program({0xEA61, 0x0002}); + reset_cpu(); + set_reg(1, 0x000000FFu); + set_reg(2, 0x0000000Fu); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(0), 0xFFFFFFFFu); // 0xFF | ~0x0F +} + +TEST_F(CortexM3Test, RsbsWideCarryReflectsMinuend) { + // #3: rsbs r0,r1,#5 (f1d1 0005) = 5 - r1; r1=3 → C=1 (5>=3). mrs r2,apsr. + load_program({0xF1D1, 0x0005, 0xF3EF, 0x8200}); + reset_cpu(); + set_reg(1, 3u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // rsbs + ASSERT_TRUE(cpu_->step().has_value()); // mrs r2,apsr + EXPECT_EQ(reg(0), 2u); + EXPECT_EQ(reg(2), 0x20000000u); // C set; bug gave C=(rn>=imm)=0 +} + +TEST_F(CortexM3Test, ShiftBy32InShiftedRegisterOperand) { + // #4: add.w r0,r1,r2,lsr #32 (eb01 0012); LSR#32 → shifted=0. + load_program({0xEB01, 0x0012}); + reset_cpu(); + set_reg(1, 0x10u); + set_reg(2, 0xFFFFFFFFu); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(0), 0x10u); // r1 + 0; bug gave r1 + r2 +} + +TEST_F(CortexM3Test, CpsFControlsFaultMaskNotPrimask) { + // #5: cpsid f (b671) → FAULTMASK, not PRIMASK. + load_program({0xB671, 0xF3EF, 0x8013, 0xF3EF, 0x8110}); // cpsid f; mrs r0,faultmask; mrs r1,primask + reset_cpu(); + start_cpu(); + for (int i = 0; i < 3; ++i) { + ASSERT_TRUE(cpu_->step().has_value()); + } + EXPECT_EQ(reg(0), 1u); // faultmask set + EXPECT_EQ(reg(1), 0u); // primask untouched (bug had set primask) +} + +TEST_F(CortexM3Test, BkptIsNotSilentlyNopped) { + // #6: bkpt #5 (be05) → HardFault entry (PC leaves the bkpt), not a NOP + // that simply advances PC to 2. + load_program({0xBE05}); + reset_cpu(); + start_cpu(); + [[maybe_unused]] auto _ = cpu_->step(); + EXPECT_NE(cpu_->pc().value_or(0xDEAD), 2u); +} + +TEST_F(CortexM3Test, MulWideRa15DoesNotFoldPc) { + // #7: mul.w r0,r1,r2 (fb01 f002); Ra=15 → no accumulate; bug added raw PC. + load_program({0xFB01, 0xF002}); + reset_cpu(); + set_reg(1, 3u); + set_reg(2, 5u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(0), 15u); +} + +TEST_F(CortexM3Test, SubwPcUsesAlignedPcPlusFour) { + // #8: subw r0,pc,#4 (f2af 0004) at PC=0; base=Align(PC+4,4)=4 → r0=0. + // bug used raw PC=0 → r0=0xFFFFFFFC. + load_program({0xF2AF, 0x0004}); + reset_cpu(); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(reg(0), 0u); +} + +TEST_F(CortexM3Test, TbhDispatchesViaTableBranchHandler) { + // #10: tbh [pc,r0,lsl#1] (e8df f010); H-bit must not misroute to LDRD. + // r0=0, table halfword at PC+4 = 0 → target = PC+4. + load_program({0xE8DF, 0xF010}); + reset_cpu(); + set_reg(0, 0u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); + EXPECT_EQ(cpu_->pc().value_or(0), 4u); +} + +TEST_F(CortexM3Test, LdrexStrexBehaveAsPlainLoadStore) { + // #11: ldrex r0,[r1] (e851 0f00) → plain load; strex r3,r2,[r1] (e841 2300) + // → plain store + Rd=0 (no monitor → always succeeds). + load_program({0xE851, 0x0F00, 0xE841, 0x2300}); + uint32_t seed = 0xDEADBEEFu; + ASSERT_TRUE(mem_.load(0x100u, {reinterpret_cast(&seed), 4}) + .has_value()); + reset_cpu(); + set_reg(1, 0x100u); + set_reg(2, 0xCAFEu); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // ldrex + EXPECT_EQ(reg(0), 0xDEADBEEFu); + ASSERT_TRUE(cpu_->step().has_value()); // strex + EXPECT_EQ(reg(3), 0u); // success status + auto v = bus_.read(0x100u, Width::Word); + ASSERT_TRUE(v.has_value()); + EXPECT_EQ(*v, 0xCAFEu); +} + +// ── T2 缺失指令单测(coverage matrix §3)── + +TEST_F(CortexM3Test, OrnMvnWideImmediate) { + // orn.w r3,r1,#0x11 (f061 0311)=r1|~imm; mvn.w r0,#0x11 (f06f 0011)=~imm. + load_program({0xF061, 0x0311, 0xF06F, 0x0011}); + reset_cpu(); + set_reg(1, 0x000000FFu); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // orn → 0xFF | ~0x11 + EXPECT_EQ(reg(3), 0xFFFFFFFFu); + ASSERT_TRUE(cpu_->step().has_value()); // mvn → ~0x11 + EXPECT_EQ(reg(0), 0xFFFFFFEEu); +} + +TEST_F(CortexM3Test, RorRrxShiftedRegister) { + // mov.w r0,r1,ror#4 (ea4f 1031); msr apsr,r2 (set C); mov.w r3,r1,rrx (ea4f 0331). + load_program({0xEA4F, 0x1031, 0xF382, 0x8800, 0xEA4F, 0x0331}); + reset_cpu(); + set_reg(1, 0x12345678u); + set_reg(2, 0x20000000u); // PSR_C + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // ror#4 + EXPECT_EQ(reg(0), 0x81234567u); + ASSERT_TRUE(cpu_->step().has_value()); // msr sets C + ASSERT_TRUE(cpu_->step().has_value()); // rrx: (C<<31)|(r1>>1) + EXPECT_EQ(reg(3), 0x891A2B3Cu); +} + +TEST_F(CortexM3Test, SmlalUmlalWideAccumulate) { + // smlal r0,r1,r2,r3 (fbc2 0103); umlal r0,r1,r2,r3 (fbe2 0103). + // r2=r3=0xFFFFFFFF: signed product=1, unsigned=0xFFFFFFFE00000001. + load_program({0xFBC2, 0x0103, 0xFBE2, 0x0103}); + reset_cpu(); + set_reg(0, 0u); + set_reg(1, 0u); + set_reg(2, 0xFFFFFFFFu); + set_reg(3, 0xFFFFFFFFu); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // smlal: 0 + 1 + EXPECT_EQ(reg(0), 1u); + EXPECT_EQ(reg(1), 0u); + ASSERT_TRUE(cpu_->step().has_value()); // umlal: 1 + 0xFFFFFFFE00000001 + EXPECT_EQ(reg(0), 2u); + EXPECT_EQ(reg(1), 0xFFFFFFFEu); +} + +TEST_F(CortexM3Test, LdrsbLdrshWideSignExtend) { + // ldrsb.w r0,[r1,#4] (f991 0004); ldrsh.w r0,[r1,#8] (f9b1 0008). + load_program({0xF991, 0x0004, 0xF9B1, 0x0008}); + uint8_t b = 0x80; + uint16_t h = 0x8000; + ASSERT_TRUE(mem_.load(0x104u, {&b, 1}).has_value()); + ASSERT_TRUE(mem_.load(0x108u, {reinterpret_cast(&h), 2}) + .has_value()); + reset_cpu(); + set_reg(1, 0x100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // ldrsb → 0xFFFFFF80 + EXPECT_EQ(reg(0), 0xFFFFFF80u); + ASSERT_TRUE(cpu_->step().has_value()); // ldrsh → 0xFFFF8000 + EXPECT_EQ(reg(0), 0xFFFF8000u); +} + +TEST_F(CortexM3Test, ClzRbitRevWide) { + // clz r0,r1 (fab1 f081); rbit r0,r1 (fa91 f0a1); rev.w r0,r1 (fa91 f081). + load_program({0xFAB1, 0xF081, 0xFA91, 0xF0A1, 0xFA91, 0xF081}); + reset_cpu(); + set_reg(1, 0x00010000u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // clz (bit16 → 15 leading zeros) + EXPECT_EQ(reg(0), 15u); + ASSERT_TRUE(cpu_->step().has_value()); // rbit (bit16 → bit15) + EXPECT_EQ(reg(0), 0x00008000u); + ASSERT_TRUE(cpu_->step().has_value()); // rev.w (byte-reverse) + EXPECT_EQ(reg(0), 0x00000100u); +} + +TEST_F(CortexM3Test, SsatUsatWideSaturation) { + // ssat r0,#5,r1 (f301 0004): 100→15,Q; usat r2,#5,r1 (f381 0205): 100→31,Q. + // mrs r3,apsr (f3ef 8300) reads Q. + load_program({0xF301, 0x0004, 0xF381, 0x0205, 0xF3EF, 0x8300}); + reset_cpu(); + set_reg(1, 100u); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // ssat → 15 + EXPECT_EQ(reg(0), 15u); + ASSERT_TRUE(cpu_->step().has_value()); // usat → 31 + EXPECT_EQ(reg(2), 31u); + ASSERT_TRUE(cpu_->step().has_value()); // mrs r3,apsr + EXPECT_NE(reg(3) & 0x08000000u, 0u); // Q set +} + +TEST_F(CortexM3Test, ClrexNopWideAreNoop) { + // clrex (f3bf 8f2f); nop.w (f3af 8000) — both advance PC, no fault. + load_program({0xF3BF, 0x8F2F, 0xF3AF, 0x8000}); + reset_cpu(); + start_cpu(); + ASSERT_TRUE(cpu_->step().has_value()); // clrex PC 0→4 + EXPECT_EQ(cpu_->pc().value_or(0), 4u); + ASSERT_TRUE(cpu_->step().has_value()); // nop.w PC 4→8 + EXPECT_EQ(cpu_->pc().value_or(0), 8u); +} + +TEST_F(CortexM3Test, McrMrcCoprocessorFaults) { + // mrc p15 (ee11 0f10) → no coprocessor → IllegalInstruction. + load_program({0xEE11, 0x0F10}); + reset_cpu(); + start_cpu(); + EXPECT_FALSE(cpu_->step().has_value()); // faults +} diff --git a/test/test_cortex_m3_basic.cpp b/test/test_cortex_m3_basic.cpp index c47b522..b24ee90 100644 --- a/test/test_cortex_m3_basic.cpp +++ b/test/test_cortex_m3_basic.cpp @@ -223,3 +223,67 @@ TEST_F(CortexM3Test, CbzAndCbnzBranchWithoutTouchingStack) { EXPECT_EQ(reg(3), 9u); EXPECT_EQ(reg(13), 0x200u); } + +// ── Shift Carry flag (T1a): ARMv7-M shift instructions update C from the +// shifter carry-out. We read flags via `MRS R0, APSR` (0xF3EF 0x8000); the +// C flag is APSR bit 29. ── + +TEST_F(CortexM3Test, LslImmediateSetsCarryFlag) { + // LSLS R1, R2, #1 (0x0051): 0x80000000<<1 = 0, C = shifted-out bit31 = 1. + load_program({0x0051, 0xF3EF, 0x8000}); // LSLS R1,R2,#1 ; MRS R0,APSR + reset_cpu(); + start_cpu(); + set_reg(2, 0x80000000u); + step_cpu(); // LSLS + step_cpu(); // MRS (32-bit, one step) + EXPECT_NE(reg(0) & (1u << 29), 0u) << "LSL #1 of 0x80000000 must set C"; +} + +TEST_F(CortexM3Test, LsrImmediateSetsCarryFlag) { + // LSRS R1, R2, #1 (0x0851): 1>>1 = 0, C = shifted-out bit0 = 1. + load_program({0x0851, 0xF3EF, 0x8000}); + reset_cpu(); + start_cpu(); + set_reg(2, 1u); + step_cpu(); + step_cpu(); + EXPECT_NE(reg(0) & (1u << 29), 0u) << "LSR #1 of 1 must set C"; +} + +TEST_F(CortexM3Test, AsrBy32CarryAndSignExtend) { + // ASRS R1, R2 (0x1011, imm5=0 → ASR #32): 0x80000000 → 0xFFFFFFFF, + // C = sign bit (bit31) = 1. + load_program({0x1011, 0xF3EF, 0x8000}); + reset_cpu(); + start_cpu(); + set_reg(2, 0x80000000u); + step_cpu(); + step_cpu(); + EXPECT_EQ(reg(1), 0xFFFFFFFFu) << "ASR #32 sign-extends"; + EXPECT_NE(reg(0) & (1u << 29), 0u) << "ASR #32 C = sign bit"; +} + +TEST_F(CortexM3Test, LslByZeroLeavesCarryUnchanged) { + // LSLS R1,R2,#1 (0x0051) sets C=1; LSLS R3,R4 (0x0023, LSL #0 = MOV) + // must NOT touch C; then MRS. + load_program({0x0051, 0x0023, 0xF3EF, 0x8000}); + reset_cpu(); + start_cpu(); + set_reg(2, 0x80000000u); // first LSL forces C=1 + step_cpu(); // LSLS R1,R2,#1 → C=1 + step_cpu(); // LSLS R3,R4 (LSL #0) → C unchanged + step_cpu(); // MRS R0, APSR + EXPECT_NE(reg(0) & (1u << 29), 0u) << "LSL #0 must leave C unchanged"; +} + +TEST_F(CortexM3Test, LslRegisterSetsCarryFlag) { + // LSLS R1, R2 (register, 0x4091): R1 = R1 << R2; 0x80000000<<1 → 0, C=1. + load_program({0x4091, 0xF3EF, 0x8000}); + reset_cpu(); + start_cpu(); + set_reg(1, 0x80000000u); + set_reg(2, 1u); + step_cpu(); + step_cpu(); + EXPECT_NE(reg(0) & (1u << 29), 0u) << "register LSL must set C"; +} diff --git a/test/test_firmware_armcc.cpp b/test/test_firmware_armcc.cpp new file mode 100644 index 0000000..a6a2854 --- /dev/null +++ b/test/test_firmware_armcc.cpp @@ -0,0 +1,91 @@ +// E2E regression for the armcc/AC6 firmware corpus (T5c). +// +// These fixtures are PREBUILT STM32CubeCubeF1 STM32F103RB-Nucleo examples +// compiled under Keil MDK with Arm Compiler 6 (armclang), committed as ELF +// (.axf) binaries under test/firmware/armcc/. CI has no Keil, so we do not +// rebuild them; the committed .axf are the gate. They exercise armcc codegen +// (different from the gcc hal_uart sample) and validate the simulator against +// real compiler output. +// +// Regeneration recipe: see test/firmware/armcc/REGENERATE.md + +#include + +#include "arch/arm/cortex_m3/cortex_m3.hpp" +#include "chips/stm32f1/stm32f103_soc.hpp" + +#include +#include +#include + +using namespace micro_forge; +using namespace micro_forge::chips::stm32f1; + +namespace { + +std::vector read_file(const char* path) { + std::ifstream f(path, std::ios::binary); + if (!f) { + return {}; + } + return {std::istreambuf_iterator(f), {}}; +} + +// Boot a firmware to its steady loop; return "" on a clean run (no fault), +// otherwise a diagnostic string. Using a plain return value (not ASSERT_*) +// keeps the assertions inside the test body where gtest can short-circuit. +std::string boot_clean_or_diag(const char* path, size_t steps) { + auto data = read_file(path); + if (data.empty()) { + return std::string("firmware fixture missing: ") + path; + } + auto soc = Stm32f103Soc::create(); + if (!soc.has_value()) { + return "Stm32f103Soc::create() failed"; + } + auto r = (*soc)->load_elf(data); + if (!r.has_value()) { + return "ELF load failed"; + } + (*soc)->run(steps); + + auto state = (*soc)->machine().cpu->state(); + if (!state.has_value()) { + return "cpu->state() failed"; + } + if (*state == cpu::CPU::State::Faulted) { + char buf[64]; + auto cm3 = (*soc)->cortex_m3_cpu(); + auto pc = cm3->pc(); + std::snprintf(buf, sizeof(buf), "faulted at PC=0x%08lx", + static_cast(pc.has_value() ? *pc : 0xDEAD)); + return buf; + } + return {}; +} + +} // namespace + +// 2,000,000 steps mirrors the budget used to reach the main loop of the Keil +// F103 firmware (see document/notes/007). The gate is "boots clean, no fault". + +TEST(FirmwareArmcc, TimTimeBaseBootsClean) { + EXPECT_EQ(boot_clean_or_diag( + ARMCC_FW_DIR "/nucleo_f103rb_tim_timebase.ac6.axf", 2'000'000), + "") + << "TIM_TimeBase (armcc/AC6) failed to boot clean"; +} + +TEST(FirmwareArmcc, UartPrintfBootsClean) { + EXPECT_EQ(boot_clean_or_diag( + ARMCC_FW_DIR "/nucleo_f103rb_uart_printf.ac6.axf", 2'000'000), + "") + << "UART_Printf (armcc/AC6) failed to boot clean"; +} + +TEST(FirmwareArmcc, GpioIoToggleBootsClean) { + EXPECT_EQ(boot_clean_or_diag( + ARMCC_FW_DIR "/nucleo_f103rb_gpio_iotoggle.ac6.axf", 2'000'000), + "") + << "GPIO_IOToggle (armcc/AC6) failed to boot clean"; +}