diff --git a/.agents/skills/code_style b/.agents/skills/code_style new file mode 120000 index 000000000..edb444fb7 --- /dev/null +++ b/.agents/skills/code_style @@ -0,0 +1 @@ +../../.github/skills/code_style \ No newline at end of file diff --git a/.agents/skills/execute b/.agents/skills/execute new file mode 120000 index 000000000..ff61f8ce2 --- /dev/null +++ b/.agents/skills/execute @@ -0,0 +1 @@ +../../.github/skills/execute \ No newline at end of file diff --git a/.agents/skills/ghidra b/.agents/skills/ghidra new file mode 120000 index 000000000..e348149ed --- /dev/null +++ b/.agents/skills/ghidra @@ -0,0 +1 @@ +../../.github/skills/ghidra \ No newline at end of file diff --git a/.agents/skills/implement b/.agents/skills/implement new file mode 120000 index 000000000..609ac0075 --- /dev/null +++ b/.agents/skills/implement @@ -0,0 +1 @@ +../../.github/skills/implement \ No newline at end of file diff --git a/.agents/skills/line_lookup b/.agents/skills/line_lookup new file mode 120000 index 000000000..98bcd185e --- /dev/null +++ b/.agents/skills/line_lookup @@ -0,0 +1 @@ +../../.github/skills/line_lookup \ No newline at end of file diff --git a/.agents/skills/lookup b/.agents/skills/lookup new file mode 120000 index 000000000..e0466a56a --- /dev/null +++ b/.agents/skills/lookup @@ -0,0 +1 @@ +../../.github/skills/lookup \ No newline at end of file diff --git a/.agents/skills/refiner b/.agents/skills/refiner new file mode 120000 index 000000000..d61e56ea1 --- /dev/null +++ b/.agents/skills/refiner @@ -0,0 +1 @@ +../../.github/skills/refiner \ No newline at end of file diff --git a/.agents/skills/scaffold b/.agents/skills/scaffold new file mode 120000 index 000000000..5684eddc2 --- /dev/null +++ b/.agents/skills/scaffold @@ -0,0 +1 @@ +../../.github/skills/scaffold \ No newline at end of file diff --git a/.claude b/.claude new file mode 120000 index 000000000..c0ca46856 --- /dev/null +++ b/.claude @@ -0,0 +1 @@ +.agents \ No newline at end of file diff --git a/.github/skills/code_style/SKILL.md b/.github/skills/code_style/SKILL.md index 5218fb40b..1121d173b 100644 --- a/.github/skills/code_style/SKILL.md +++ b/.github/skills/code_style/SKILL.md @@ -31,7 +31,7 @@ python tools/code_style.py audit --base origin/main - `audit` also checks touched `class` / `struct` declarations against known header declarations and, when no header exists, against the PS2 visibility rule. - `audit` warns on touched local forward declarations when the repo already has a header for that type. - `audit` warns on touched type members that look like invented padding or placeholder names such as `pad`, `unk`, or `field_1234`. -- `audit` also checks touched style-guide rules that clang-format cannot enforce for you, such as cast spacing, `using namespace`, `NULL`, and missing `EA_PRAGMA_ONCE_SUPPORTED` guard blocks when a header's guard region is touched. +- `audit` also checks touched style-guide rules that clang-format cannot enforce for you, such as cast spacing, `using namespace`, `NULL`, bare `#if MACRO` presence checks, recovered layout members that still use raw `unsigned char` / `unsigned short`, and missing or misordered `EA_PRAGMA_ONCE_SUPPORTED` guard blocks when a header's prologue is touched. - `audit` groups repeated findings by file so branch-wide output stays readable. - Use `audit --category safe-cpp` when you want a smaller Frontend/FEng-focused subset and `audit --category match-sensitive-cpp` when you want a conservative review queue for decomp code. - `format --check` is an opt-in wrapper around the repo's `.clang-format`, and by default it targets eligible changed C/C++ files, including match-sensitive code. @@ -95,7 +95,14 @@ Foo::Foo() - Use `nullptr` exclusively for null pointers. - Prefer `if (ptr)` / `if (!ptr)` over explicit null comparisons when the change is local and verified safe. - When a match-sensitive TU has many explicit `nullptr` checks and you decide to normalize them, prefer one mechanical full-TU pass over piecemeal cleanup. Rebuild the unit and re-check its status before keeping the rewrite. +- When a helper is doing address arithmetic, prefer `intptr_t` / `uintptr_t` or byte-pointer (`reinterpret_cast`) math over plain `int` parameters or integerized pointer subtraction. - Inline assembly is acceptable when it is needed to preserve dead-code compares, ordering, or other compiler behavior that source alone cannot reproduce. +- In low-level list / node / allocator code, prefer existing helper methods such as `AddBefore`, `AddAfter`, `Remove`, `GetPrev`, `GetNext`, or typed accessors over open-coding link rewiring once the helper exists. + +### Header prologues and preprocessor checks + +- In headers, keep the guard / `EA_PRAGMA_ONCE_SUPPORTED` block before any project `#include`; do not place includes ahead of `#pragma once`. +- Use `#ifdef MACRO` / `#ifndef MACRO` for presence checks. Reserve bare `#if MACRO` for cases where you really need the macro's numeric value. ### Forward declarations and local prototypes @@ -103,6 +110,7 @@ Foo::Foo() - If the repo already has a header declaration/definition for a type, include that header instead of redeclaring the type locally. - If the repo only has an empty or stub owner header, and line info / surrounding source clearly points at that header's subsystem, prefer populating that owner header over leaving a recovered project type declaration inside a `.cpp`. - Only keep a local forward declaration when no canonical repo header exists yet and you have verified that the ownership is still unresolved. +- Likewise for project free functions: if a declaration is shared across translation units, move it into the owning header instead of leaving ad-hoc local prototypes in `.cpp` files. - Prefer moving helper template declarations next to their real use site instead of leaving them in an unrelated file. ### Pointer style @@ -114,14 +122,22 @@ Foo::Foo() - Use the repo's header guard form when writing headers: `#ifndef` / `#define` plus the `#ifdef EA_PRAGMA_ONCE_SUPPORTED` / `#pragma once` block. - Keep member layout comments aligned and intact in decomp headers. +- In match-sensitive headers, do not add class/member placement-`new` or unsized `operator delete` overloads just because the implementation uses placement new or delete expressions. Prefer the platform/global overloads that the original headers already pulled in unless DWARF proves the type exposed a custom overload. +- If a delete-path DWARF diff keeps collapsing to an empty unsized `operator delete()` helper, check whether the touched type still has a stray unsized `operator delete(void *)` overload. In `zAttribSys`, removing the extra unsized `HashMap::operator delete(void *)` restored the original-sized delete path and made multiple matched functions exact. +- When writing a recovered layout, start from a pasted GC DWARF dump instead of hand-reconstructing a cleaner version. Treat the dump as source-of-truth data entry, then make only small verified fixes from PS2 or existing headers. - Preserve the original `class` / `struct` kind from existing headers or Dwarf / PS2 evidence; do not treat it as a cosmetic style choice. - Treat header declarations as the repo source of truth. If the repo only has local `.cpp` partial declarations, verify the kind with the PS2 dump instead of copying them blindly. - Even forward declarations and local partial declarations should use the accurate keyword when known. +- Keep the `// total size: 0x...` comment above the recovered type declaration instead of burying it inside the body. +- When a recovered type is a `class`, keep explicit access sections and put the method/accessor block before the member layout block unless existing repo evidence shows otherwise. - Preserve the member naming style that DWARF shows. Some types use `mMember`, others use `m_member`; do not normalize them. - Preserve recovered member names, types, order, and offset comments. Do not invent placeholder members named `pad`, `unk`, `unknown`, or `field_XXXX` for game code just to make a layout compile. +- Preserve the dumped declaration order too. Do not regroup methods, helpers, enums, or fields for readability unless an existing repo header or PS2 evidence proves the original order differs. - If a member is genuinely unknown, stop and verify it with `find-symbol.py`, GC Dwarf, and PS2 data. If the layout is still incomplete, add a short TODO above the type instead of burying uncertainty in fake member names. - Add offset / size comments when you are writing recovered type layouts from DWARF. +- In recovered layouts, prefer explicit-width aliases such as `uint8` / `uint16` when the field width is known. Use plain `char` for text / byte buffers and `signed char` when the field is a signed 8-bit counter. - Define inline member functions in headers only when DWARF shows that they are genuinely inlined in the binary. +- In touched shared inlines/templates, preserve recovered parameter names too. In `zAttribSys`, changing `HashMapTablePolicy::WrapIndex` from `k` back to DWARF's `index` cleared several matched-function DWARF mismatches without changing codegen. - Use `struct` for POD-like data carriers with public fields; use `class` for behavior-heavy types only when that matches the recovered type information. - Keep tiny placeholder methods as concise inline bodies when that is already the local pattern. @@ -134,13 +150,28 @@ Foo::Foo() ### Dense local code - Expand dense one-line helper structs, declaration blocks, and function bodies in non-match-sensitive files into normal multiline formatting. +- In low-level headers, prefer normal multi-line bodies for touched inline operators and accessors instead of stacking `{ return ...; }` on one line, unless the surrounding file clearly uses intentional placeholder one-liners. +- In match-sensitive `.cpp` files, do not slide a restored tiny out-of-line special member above the file's first real top-level function just for tidiness. On `zAttribSys`, moving `CollectionHashMap::~CollectionHashMap()` above `Class::Class` was enough to rename a `global constructors keyed to ...` helper and drop the TU until the original first-function order was restored. - Prefer readable blocks over stacked one-line statements when behavior does not depend on exact source shape. +- In touched validation/parsing code, prefer explicit min/max or boundary checks over equivalent magic-constant arithmetic when the clearer form still compiles to the verified result. +- In parser/state-table code, prefer named enums and enum-typed state variables over anonymous integer state codes when that rewrite is verified safe. + +### Recovery markers + +- Remove stale recovery markers such as `// TODO`, `// UNSOLVED`, or `// STRIPPED` when the touched code is now implemented or understood. +- If a marker still needs to stay, give it short context such as ownership uncertainty, a Dwarf caveat, a platform/config note, or a scratch/link reference. Avoid bare marker-only comments. +- Do not leave `// TODO` hanging off a declaration or helper you just implemented; either finish the thought or remove the marker. ### Uncertain ownership - If a declaration or global clearly compiles but its original home is uncertain, add a short TODO comment instead of inventing structure you cannot justify yet. - When ownership matters, verify it with `decomp-workflow.py`, `decomp-context.py`, and `line-lookup` before moving code. +### Readable helper extraction + +- When touched recovered code repeats the same pointer/boundary arithmetic, prefer a short named helper or accessor such as `GetTop`, `GetBot`, `GetNext`, `GetPrev`, `GetStringTableStart`, or `GetStringTableEnd` if that shape is already supported by Dwarf/inlining evidence. +- Prefer call sites that use those helpers or existing container APIs over re-encoding the same arithmetic or link manipulation inline. + ## Phase 3: Things Not To "Clean Up" Blindly - Do not move an inline method out of a header just because it looks cleaner. @@ -172,3 +203,10 @@ Keep the cleanup only if the build succeeds and the relevant match status is unc - The trailing `//` initializer-list markers are an intentional repo convention, not noise to remove. - Small `if (ptr)` cleanup batches can be kept in match-sensitive code, but only after rebuilding the affected unit. - Dense frontend shim files benefit from multiline struct/prototype/function formatting. +- Header prologues should keep the `EA_PRAGMA_ONCE_SUPPORTED` block ahead of includes, not after them. +- Bare `#if MACRO` presence checks are review bait; use `#ifdef` / `#ifndef` unless you are intentionally testing a numeric config value. +- Reviewed recovered headers tend to keep total-size comments above the type, methods before fields, explicit access sections, and fixed-width aliases for width-known narrow integer members. +- Recent `zMisc` review cleanup also showed that hand-reconstructed structs and reordered declarations create avoidable churn; copy recovered layouts from DWARF into the owner header first and keep the dumped order unless PS2/header evidence proves a correction. +- Reviewed fixups also remove stale bare recovery markers or replace them with context, and prefer existing list/node helpers over hand-written pointer/link rewiring. +- Some reviewed fixups improved readability without losing match by replacing opaque range-check arithmetic with explicit bounds and by moving repeated pointer/boundary math behind short named helpers. +- Other recurring review churn came from plain-`int` address helpers, stray local `.cpp` prototypes for shared functions, and integer-coded parser states where named enums were clearer but still matched. diff --git a/.github/skills/execute/SKILL.md b/.github/skills/execute/SKILL.md index 7abf9cb3f..ea6fef41b 100644 --- a/.github/skills/execute/SKILL.md +++ b/.github/skills/execute/SKILL.md @@ -11,6 +11,9 @@ the produced C++ compiles to byte-identical object code against the original ret For each function, "done" means both objdiff and normalized DWARF are exact. +Human review is not a substitute for running `dwarf compare`. Each function should hit +its own `verify` gate before you treat it as ready to hand off, commit, or move past. + ## Overview This workflow combines several smaller workflows: @@ -88,6 +91,10 @@ definition does not yet exist in the project, follow the scaffold workflow in `.github/skills/scaffold/SKILL.md` to create the needed header/source definitions before moving on. +Treat recovered types here as copied reference data, not as hand-designed headers. Copy +the GC DWARF type body into the canonical owner header first and preserve its declaration +order unless PS2 or existing repo-header evidence proves a specific correction. + ## Phase 3: Implement Functions ### 3a. Get the updated function list @@ -113,6 +120,9 @@ For each missing or nonmatching function, follow the implementation workflow in - **One at a time.** Keep the tree in a coherent state as you work through the list. - **Balance new vs fixing.** Don't get stuck on one stubborn function — sometimes implementing the next function reveals patterns that make the previous one click. +- **Recovered types are not freeform.** If a function forces you to add or fix a type, + copy the DWARF layout into the owner header first. Do not sketch structs/classes from + use sites or reorder declarations just to make the header look nicer. - **Mismatch triage:** - `@stringBase0` offset mismatches often resolve as more string literals are added - If you need to inspect the original string or rodata at a virtual address, use `python tools/elf_lookup.py 0xADDR` @@ -152,6 +162,10 @@ python tools/decomp-workflow.py verify -u main/Path/To/TU -f FunctionName If it fails, follow up with `decomp-workflow.py diff` and `decomp-workflow.py dwarf` until both checks pass. +Do not queue up several "probably done" functions and leave the DWARF check for later. +Close the `verify` gate per function before moving on whenever feasible; otherwise the +reviewer ends up doing avoidable DWARF triage. + ### 3g. Periodic reassessment After every few functions, re-run the full status check: @@ -189,6 +203,8 @@ For any remaining nonmatching functions, make one final pass using the implement or refiner workflow with all context accumulated during the session. Do not report a function as complete unless its per-function `verify` check also passes. +Do not hand a function to review as "done except maybe DWARF" — either resolve the DWARF +failure yourself or explicitly call out the blocker and why it remains. ## Phase 5: Report diff --git a/.github/skills/implement/SKILL.md b/.github/skills/implement/SKILL.md index 25f56b926..8915ac246 100644 --- a/.github/skills/implement/SKILL.md +++ b/.github/skills/implement/SKILL.md @@ -9,6 +9,11 @@ Your goal is to decompile a specific function: writing C++ source that compiles A function is not done until it is exact in both objdiff and normalized DWARF. +Reviewers should not be spending their time rediscovering DWARF mismatches. Before you +report progress, ask for review, hand the function off, or switch to another target, you +must run the per-function verification gate yourself and treat any DWARF failure as your +next task, not as review debt. + ## Phase 1: Gather Context Collect data from **all** of these sources in parallel where possible. @@ -85,6 +90,8 @@ Reference the skill for the usage. It gives info based on the virtual address of - If a repo header already exists for the type, include that header instead of introducing a local forward declaration. - Preserve the original `class` vs `struct` kind. If the existing header is missing or incomplete, verify the type kind from GC Dwarf and PS2 info before writing a local declaration. - Preserve real member names and field types too. Do not introduce `pad`, `unk`, or `field_XXXX` members as placeholders for guessed layout; verify the member list from GC Dwarf / PS2 data and leave a TODO when something is still uncertain. +- When a type is missing or incomplete, dump the full class/struct body from GC DWARF and paste that as the starting point. Do not reconstruct the layout from one function's field accesses or from guessed semantics. +- Preserve the dumped declaration order as well as the member order. Do not re-sort methods, group fields by guessed meaning, or otherwise "clean up" the layout unless an existing repo header or PS2 evidence proves a specific correction. ### 1e. Assembly reference @@ -125,6 +132,8 @@ and assembly: Utilize the dwarf information that you get from the lookup skill heavily. +For any recovered type you touch while implementing the function, treat the DWARF body as source material to copy, not prose to paraphrase. Start from the dumped layout in the canonical owner header, then make only the minimal verified fixes. + Don't add explanatory comments during implementation unless you need to document a remaining DWARF mismatch. Don't use any temporary local variables that don't exist in the dwarf. @@ -156,6 +165,16 @@ python tools/decomp-workflow.py verify -u main/Path/To/TU -f FunctionName If the build fails, fix compilation errors first. +As soon as you have a compiling draft, run the combined verification gate immediately: + +```sh +python tools/decomp-workflow.py verify -u main/Path/To/TU -f FunctionName +``` + +Do this before you spend a long time polishing late instruction mismatches. If `verify` +already shows a DWARF failure, fix that first so you are not polishing code the reviewer +will bounce anyway. + ### Check the diff ```sh @@ -203,6 +222,17 @@ debug-line owner files for each DWARF `// Range:` block, which makes it much eas spot inlines that are coming from the wrong header or owner file. Exact line-number agreement is a useful secondary hint, but file ownership is the first thing to check. +Use this as the default loop when the function compiles but `verify` is still failing: + +1. Run `verify`. +2. If DWARF fails, run `dwarf`. +3. Fix the structural issue the DWARF diff is pointing at first: missing/extra locals, + wrong qualifiers or parameter types, wrong inline ownership, wrong helper/header owner, + or a source shape that outlined something that should be inlined. +4. Rebuild and rerun `verify`. +5. Only return to instruction-by-instruction cleanup once the remaining failures are no + longer obvious DWARF-compare issues. + Manual fallback: After writing your code, you can also run the dwarf dump on the compiled output and then query your output dump with lookup.py to compare your decompiled functions against the originals. Since the address of the function you're working on can keep changing @@ -233,6 +263,9 @@ Every mismatched instruction is a signal — don't settle for "close enough". Reaching 100% instruction matching status is not enough. Stay in the loop until `verify` passes, which means the DWARF of the function also matches after normalization. +Do not leave a function in a "review-ready" or "good enough for now" state with a known +DWARF failure unless you are explicitly blocked and you document that blocker clearly. + ## Phase 5: Report Summarize: diff --git a/.github/skills/refiner/SKILL.md b/.github/skills/refiner/SKILL.md index a6aeb2125..0054e6e18 100644 --- a/.github/skills/refiner/SKILL.md +++ b/.github/skills/refiner/SKILL.md @@ -15,9 +15,25 @@ approaches that were tried before — instead, apply systematic lateral analysis - A diff is available (`decomp-diff.py -u -d `). - The "obvious" translation from Ghidra has been attempted. - You have been given the current source code and the diff. +- You have already run the per-function `verify` gate and know whether the remaining work + is still structural DWARF cleanup or true late-stage instruction cleanup. + +Refiner is not the place to dump unresolved DWARF debt on a reviewer. If `verify` or +`dwarf` is still showing obvious structural mismatches (missing locals, wrong types, +wrong inline ownership, wrong helper/header owner), fix those first or drop back to the +implementer workflow before doing late instruction polish. ## Phase 1: Read the full diff without collapsing +Before you start a refiner pass, confirm the gate status: + +```sh +python tools/decomp-workflow.py verify -u main/Path/To/TU -f FunctionName +``` + +If the combined gate is failing for reasons that are still clearly visible in the DWARF +diff, address those first instead of treating them as reviewer follow-up. + Preferred shortcut: ```sh @@ -151,6 +167,9 @@ DWARF mismatches to watch for: - Wrong return type - Missing inlined function records (means an inline call was outlined) +If these mismatches are still present, you are not in pure refiner territory yet. Resolve +them before you ask a reviewer to spend time on the function. + ## Phase 4: Report Summarize: diff --git a/.github/skills/scaffold/SKILL.md b/.github/skills/scaffold/SKILL.md index a2f062f50..52cdf0e9a 100644 --- a/.github/skills/scaffold/SKILL.md +++ b/.github/skills/scaffold/SKILL.md @@ -29,7 +29,14 @@ Collect data from **all** of these sources in parallel where possible: ## Phase 2: Setup class -Copy and cleanup the header that you got from running the `lookup` skill using the `symbols/Dwarf` folder. Fix visibility, function order and vtable related things based on using `lookup` on the PS2 types. +Copy the header/type body that you got from running the `lookup` skill using the +`symbols/Dwarf` folder into the canonical owner header first. Do not retype or +reconstruct the layout from memory, from scattered callsites, or from guessed +semantics. + +Then do the minimum cleanup backed by evidence: fix visibility, function order and +vtable related things based on using `lookup` on the PS2 types, and clean up duplicated +inline copies when the DWARF emitted both versions. For formatting and local cleanup while writing the header, consult `.github/skills/code_style/SKILL.md`. Use it for member-comment alignment, declaration @@ -39,6 +46,9 @@ Preserve the real `class` / `struct` kind while scaffolding. Check existing head then use Dwarf plus PS2 visibility / vtable info to decide the type kind. Even temporary forward declarations should match the known original kind. +Keep the header prologue in repo order: header guard, `EA_PRAGMA_ONCE_SUPPORTED` block, +then includes. Do not drop project includes ahead of `#pragma once`. + If the repo already has a header for a type you need, include that header instead of adding a new local forward declaration. Only forward-declare when no canonical repo header exists yet and you have verified that the ownership is still unresolved. @@ -47,11 +57,31 @@ Preserve real member names, types, order, and offset comments while scaffolding. fill gaps with invented `pad`, `unk`, or `field_XXXX` members for game types; verify the layout from Dwarf / PS2 data and leave a TODO over the type if a field is still uncertain. +Preserve the declaration order from the dumped type body as well, not just the member +order. Do not regroup methods, fields, enums, or helper declarations for readability +unless an existing repo header or PS2 evidence proves the original owner header used a +different order. + +Keep the `// total size: 0x...` comment above the recovered type declaration. When the +recovered type is a `class`, keep explicit access sections and prefer putting methods / +accessors before the member layout block unless existing repo evidence says otherwise. + +When a recovered field width is known, prefer explicit-width aliases such as `uint8` / +`uint16` over raw `unsigned char` / `unsigned short`. Use plain `char` for string or byte +buffers and `signed char` when the field is a signed 8-bit counter. + +If a recovered type repeatedly walks neighbors, boundaries, or in-object offsets, prefer +small named helpers such as `GetTop`, `GetBot`, `GetNext`, `GetPrev`, or boundary getters +instead of repeating raw pointer arithmetic at each call site. + +When those helpers operate on addresses or byte offsets, prefer `intptr_t` / `uintptr_t` +or explicit byte-pointer arithmetic instead of plain `int` address parameters. + Only create headers if it's really necessary (the struct doesn't have inlines so you can't determine in which header file it goes and it's thematically very different from the other structs that use it), otherwise put it into the one you determined to be correct. The dwarf often has duplicated inlines, clean those up according to the order in the PS2 info. -Write a TODO comment over the struct/class if you aren't 100% sure that it belongs to the correct header. +Write a TODO comment over the struct/class if you aren't 100% sure that it belongs to the correct header, and say why (ownership uncertainty, circular dependency, dwarf caveat, etc.) instead of leaving a bare marker. ## Phase 3: Add needed files to jumbo file and compile diff --git a/.gitignore b/.gitignore index 547d049c6..1e78f634d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,12 @@ .idea/ .vs/ +# macOS +.DS_Store +.AppleDouble +.LSOverride +._* + # Caches __pycache__ .mypy_cache diff --git a/AGENTS.md b/AGENTS.md index d367fc237..798e39bec 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -12,8 +12,16 @@ ninja all_source # build all objects ninja # build all objects, hash check and progress report ninja baseline # generates baseline report for regression checking ninja changes # check for regressions after code changes (empty = no regressions) +python tools/build_matrix.py # sequential full `ninja` across GC/Xbox/PS2, then restore GOWE69 +python tools/build_matrix.py --all-source # sequential compile-only smoke check across GC/Xbox/PS2 ``` +Use `python tools/build_matrix.py` when you want one command that verifies the current +worktree across all supported platforms. It runs `configure.py --version ...` and the +selected ninja target sequentially, writes per-platform logs under `build//logs/`, +prints failure tails with the exact failing command, and restores the worktree to +`GOWE69` by default when it finishes. + ## Project Layout ``` @@ -31,16 +39,7 @@ objdiff.json Generated build/diff configuration ## Sub-Agent Usage -Sub-agents are allowed only for **read-only exploration** tasks such as: - -- searching the codebase for symbols, call sites, or include relationships -- inspecting decomp output, assembly, DWARF, PS2 dumps, or line mappings -- gathering context from Ghidra, `tools/decomp-workflow.py`, `lookup.py`, `decomp-diff.py`, or similar tools -- summarizing findings that help the main worker decide what to change - -Sub-agents must **not** write or edit code files, headers, configs, or other repository files. -All persistent file changes, decomp implementations, scaffolding, and follow-up fixes must be -done by the main worker after reviewing the read-only findings. +Sub-agents are **strictly prohibited**. Do not use sub-agents for any tasks (whether read-only exploration or editing). All work must be performed by the main worker directly. ## Forbidden Changes @@ -259,6 +258,94 @@ It also compares the debug-line ownership of each `// Range:` block. Treat the strong evidence that an inline body came from the wrong header or owner file. The exact file+line count is stricter and mainly useful as a secondary hint, not as the main gate. +### dwarf1_subroutine_tree.py — Raw DWARF subroutine fallback + +When `dtk dwarf dump` prints `// ERROR: Failed to process tag ...` and +`dwarf-compare.py` cannot find the rebuilt wrapper function, inspect the raw +relocated `.debug` tree directly with: + +```sh +python tools/dwarf1_subroutine_tree.py -u main/Path/To/TU -f FunctionName +python tools/dwarf1_subroutine_tree.py build/GOWE69/src/Path/To/TU.o --tag 0xTAG +python tools/dwarf1_subroutine_tree.py -u main/Path/To/TU -f FunctionName --json +python tools/dwarf1_subroutine_tree.py -u main/Path/To/TU -f FunctionName --compare-original +python tools/dwarf1_subroutine_tree.py -u main/Path/To/TU -f FunctionName --show-non-subroutine +``` + +This prints the nested inline-subroutine / lexical-block ownership tree straight +from the raw MWCC DWARF tags, so it is the fastest way to answer: + +- does the rebuilt top-level wrapper DIE still exist at all? +- which inline owner changed (`VecHashMap<...>::Remove` vs `CollectionHashMap::Remove`, etc.)? +- did a wrapper disappear only because the dumper/parser skipped it, or because the + source really changed the raw DWARF tree? +- with `--compare-original`, which owner/name rows are actually inserted, replaced, or + missing relative to the original `symbols/Dwarf/functions.nothpp` tree? + +Treat `--compare-original` primarily as an owner/name drift check. By default it compares +owner + bare function names, so overload-only mismatches can still hide behind a clean +depth/label pass. When a candidate adds an overloaded inline helper or reuses the same +base name with a different signature, run `--show-non-subroutine` too and inspect the +rebuilt parameter/local rows directly before trusting the raw-tree result. + +### prodg_dump.py — ProDG compiler-state dump helper + +When you need the exact ProDG compiler state for one unit, prefer this helper over +reconstructing long `ngccc` / `cc1plus` command lines by hand. It recovers the real +`ngccc.exe` invocation from `ninja -t commands`, derives the matching preprocess step, +passes the real optimization / target / warning flags through to `cc1plus.exe` +(`-fdump-unnumbered` included), and can extract, summarize, or diff one function across +dump sets. + +```sh +python tools/prodg_dump.py command -u main/Speed/Indep/SourceLists/zAttribSys +python tools/prodg_dump.py dump -u main/Speed/Indep/SourceLists/zAttribSys -o /tmp/zattrib_base +python tools/prodg_dump.py extract /tmp/zattrib_base --stage lreg \ + -f 'VecHashMap::UpdateSearchLength' +python tools/prodg_dump.py summary /tmp/zattrib_base --stage rtl \ + -f 'VecHashMap::UpdateSearchLength' +python tools/prodg_dump.py trace /tmp/zattrib_base --stage greg \ + -f 'void Attrib::Database::RemoveClass(const Attrib::Class *)' --pseudo 318,319 +python tools/prodg_dump.py diff /tmp/zattrib_dumps /tmp/zattrib_dumps \ + --left-base-name base --right-base-name preinc --stages lreg,greg,rtl \ + -f 'VecHashMap::UpdateSearchLength' +python tools/prodg_dump.py diff /tmp/zattrib_oldfloor_dump /tmp/zattrib_block_dump \ + --exact --summary-only --stages rtl,greg,lreg \ + -f 'unsigned int VecHashMap::UpdateSearchLength(unsigned int, unsigned int)' +python tools/prodg_dump.py diff /tmp/zattrib_block_dump /tmp/zattrib_trial_dump \ + --exact --stages s \ + -f 'UpdateSearchLength__t10VecHashMap5ZUiZQ26Attrib5ClassZQ36Attrib5Class11TablePolicyb0Ui16UiUi' +build/tools/dtk elf disasm build/GOWE69/src/Speed/Indep/SourceLists/zAttribSys.o /tmp/zattrib_objdisasm.txt +python tools/prodg_dump.py diff /tmp/zattrib_oldfloor_objdisasm.txt /tmp/zattrib_objdisasm.txt \ + -f 'RemoveCollection__Q26Attrib5ClassPQ26Attrib10Collection' +``` + +Use `extract --grep ... -C ` when you only want a few interesting lines inside a +function block, such as stack-slot references or one pseudo register family. `summary` +prints one function's user pseudos, hard-register refs, frame-slot traffic, and compare +signatures for a given stage. `trace` is the quickest way to follow a few pseudo-register +families through a post-allocation dump: it prints each requested pseudo's current home, +conflict/preference summary, and the matching dump entries, including entries where the +pseudo only survives via its assigned hard register. Use it in `greg`/`lreg` when you are +trying to answer "what kept pseudo 318 on r6 instead of r7?" without hand-grepping a full +function dump. `diff --summary-only` is the quickest way to compare two variants +structurally without drowning in full unified diffs; it highlights changed frame-slot +counts and compare operand/order signatures, while plain `diff` still prints the raw +stage diff underneath. `diff --skip-missing` is useful when one side is a partial saved +dump set that only contains some stages or functions. For `lreg`, `diff` also summarizes +register-preference and final hard-register assignment changes so allocator/regclass +shifts stand out immediately. The helper now also understands final assembly (`--stages s`) +by extracting blocks from `.type/.size` symbol ranges; use that when ProDG's late text +dumps (`regmove` / `sched` / `sched2`) are empty in this setup. Assembly-stage queries are +by mangled symbol name, not the demangled `;; Function ...` header used by RTL-style +dumps. Plain `.s` diffs are normalized automatically now: `diff` strips `.line` / +`.debug_srcinfo` scaffolding and renumbers local `.L*` labels so you see emitted-code +movement instead of debug-section churn. It also understands `dtk elf disasm` text +directly: when you pass a disassembly text file with `.fn ...` / `.endfn ...` blocks, +`extract` and `diff` can operate on real object-level symbols without another ad hoc +extractor, and `diff` also normalizes object-local `.L_*` label tokens so +rebuilt-vs-reference wrapper diffs are easier to read. + When working with these tools, do not just work around recurring friction silently. If you notice a clear, safe workflow or tooling improvement that would make future decomp work faster, shorter, or more reliable, prefer implementing that improvement as part of the task @@ -343,8 +430,15 @@ This is a **C++98** codebase compiled with ProDG GC 3.9.3 (GCC 2.95 under the ho - Inline assembly is acceptable when needed to reproduce dead code or compiler scheduling that source alone cannot express cleanly - Preserve the original `class` vs `struct` kind. Check existing headers first, then Dwarf / PS2 info when needed. Even forward declarations and local partial declarations should use the accurate keyword when known. - Prefer including the real repo header over introducing a local forward declaration for a project type. If a type already has a header in `src/`, include it instead of redeclaring it locally. -- If a subsystem already has a stub owner header and the debug line info points back at that subsystem, fill the owner header instead of keeping a recovered project type declaration in a `.cpp`. +- If a subsystem already has a stub or umbrella owner header and the debug line info points back at that subsystem, fill the owner header instead of keeping a recovered project type declaration in a `.cpp` or spinning up a one-off micro-header just for that type. +- Apply the same owner-header rule to shared enums, globals, callback typedefs, and free functions. If multiple TUs need the declaration, put it in the canonical owner header once and include that header instead of duplicating enum bodies or `extern` blocks across `.cpp`s. - Preserve original member names, types, order, and proven layout comments. Do not invent `pad`, `unk`, or `field_XXXX` members just to satisfy a guessed size or offset; verify the real members with `find-symbol.py`, GC Dwarf, and PS2 data, and leave a short TODO if a layout detail is still uncertain. +- When recovering a type, start by copying the GC DWARF struct/class body into the canonical owner header. Treat that dump as the source of truth for declaration order too; only apply targeted fixes that are backed by existing repo headers or PS2 data, such as visibility, virtual/function order, duplicate-inline cleanup, or owner-header placement. +- Do not hand-reconstruct recovered layouts from scattered field accesses, guessed semantics, or a "cleaned up" reordering. If you do not have enough evidence to paste the type confidently, stop and gather more DWARF / PS2 info first. +- Preserve the original scope and nesting of recovered declarations too. Keep class-owned enums/types nested when the original did, and move subsystem/global enums into their real owner header instead of flattening or duplicating them near one caller. +- Use the narrowest correct home for recovered declarations: shared project-facing types in headers, TU-private helper structs/classes and allocator metadata in the `.cpp`. Do not dump implementation-only helpers into public headers just because they were convenient to write there. +- Prefer real subsystem or vendor headers over ad-hoc local typedef/prototype blocks. If an external API is shared and the project is missing the proper header, add that header in the correct subtree instead of stashing declarations in an unrelated gameplay file. +- Do not leave repeated `// TODO move`, `// TODO where should this go`, or "I just made this up" markers around declarations. Either move the declaration to its owner now or leave one short targeted TODO above the owner declaration if ownership is still genuinely unresolved. - Follow DWARF member naming exactly (`mMember` vs `m_member`) instead of normalizing names - Omit the `this` pointer. - Use `nullptr` and `override`. If they are missing, you need to include `types.h`. @@ -363,44 +457,26 @@ python tools/decomp-status.py --unit main/Path/To/TU Commit whenever the match percentage increases (e.g. you matched a new function). Use this format for the commit message: ``` -n.n%: short description of what was matched or changed +n.n[n]%: short description of what was matched or changed ``` Examples: - `42.1%: match UpdateCamera` -- `78.5%: match PlayerController constructor and destructor` +- `78.56%: match PlayerController constructor and destructor` - `100.0%: full match for zAnim` Do not batch up multiple percentage milestones into one commit — commit as each improvement lands. -## Parallel Sub-Agent Matching - -When working on a translation unit with multiple non-matching functions, use sub-agents selectively for **read-only exploration** around individual functions. Each sub-agent should focus on **exactly one function** — do not assign a sub-agent more than one function at a time. - -**Limit: never run more than 5 sub-agents concurrently.** Spawning too many at once causes resource contention and makes it harder to reason about progress. - -Guidelines: - -- Prefer solving difficult matching work in the main worker. Use sub-agents to inspect one function's context, diff, DWARF, or related call paths without editing files. -- Spawn a sub-agent per function only when the functions are independent (no shared edits to the same source lines). -- Sub-agents stay read-only. Let them inspect existing diff/context output rather than compiling or rebuilding. -- Do not sit idle waiting for sub-agents to finish. Continue with other independent investigation while they run. -- After a useful result lands and you make a real improvement, check the updated match percentage and commit if it improved. - ## Matching Philosophy You should take the Ghidra decompiler output for the initial translation step, get it to compile, make sure that the dwarf of the function matches and only then look for binary matching problems in the assembly. Be aware Ghidra usually gets the order of branches incorrect in if statements (it inverts the logic and the two bodies are swapped), this needs to be fixed to achieve bytematching status. -You may use sub-agents to gather read-only context during this process, but they must not -edit files. Treat their output as analysis input for the main worker, not as a path to -delegate source changes. - A function is only done when both objdiff and normalized DWARF are exact. Treat a 100% instruction match with a DWARF mismatch as unfinished work, not a near-complete result. -The dwarf of your structs doesn't have to neccessarily match the original due to various reasons, just make sure that you copied everything correctly. +The DWARF of your structs does not always compare cleanly in every detail, but the recovery process still starts by copying the dumped layout correctly. Do not freehand-reconstruct a struct from call sites or guessed semantics; paste the DWARF body into the real owner header first, then make only the minimal PS2/header-backed fixes such as visibility, function order, vtable order, or duplicate-inline cleanup. Never dismiss a diff as "close enough" or "just register allocation." Every mismatched instruction is a signal that the source doesn't perfectly represent the original. Even @@ -436,6 +512,8 @@ Virtual table layout is also missing from the dwarf but there on PS2. Be aware t The inline information in the dwarf is incredibly useful. When you encounter one, you should look up its body in the project. If it doesn't exist yet, deduce how the code should look like and add it to the correct header (you can use your address lookup skill or if that doesn't succeed and the inline is a member function, just find the corresponding class in the project). +For recovered structs and classes, treat DWARF as copied source material rather than a loose sketch. Paste the dumped type into the owner header first and keep its declaration/member order unless PS2 or an existing repo header proves a specific correction. + It's very important that you use math inlines from bMath and UMath as shown in the dwarf. UVector inlines use temporaries that the compiler couldn't optimize out. You can see in the dwarf on which stack address they are and deduce final destination they are copied to. ### Store instruction order hints @@ -481,6 +559,20 @@ register assignments but does NOT affect integer register assignments (and vice Every local that is NOT in the DWARF is a spurious temporary — remove it. - Every local that IS in the DWARF must exist in the source, even if you don't use the name. Name it exactly as the DWARF shows. +- When objdiff is already exact but a local only differs by lexical scope, try an equivalent + loop form that keeps the temporary inside the same block as the original DWARF. In practice, + changing a `for (...; ...; x = next)` into a `while (...) { T *next = ...; ...; x = next; }` + can fix DWARF-only scope mismatches without changing codegen. + +### Slot-pooled delete paths + +- If a recovered local/project type participates in `delete` paths or container/list teardown, + check whether the original type exposed inline `operator new` / `operator delete`. Missing + slot-pool-backed operators often makes GCC emit `__builtin_delete` instead of the original + allocator/free path and can also move destructor/delete DWARF ownership out of the TU. +- This applies even when the TU mostly allocates the type manually through `bOMalloc` or a + pool helper. Restoring the inline operators can still be necessary so `delete` expressions + and synthesized cleanup paths match the original code and DWARF. ### Virtual vs direct calls @@ -503,6 +595,19 @@ register assignments but does NOT affect integer register assignments (and vice - If an inline appears in the DWARF but does not exist in `src/`, deduce its body and add it to the correct header (use `line-lookup` skill to find the header file). +### Concrete template specializations + +- Do not assume ProDG rejects explicit member-function specializations of concrete template + instantiations. Forms like + `template <> inline ReturnType VecHashMap<...>::RemoveIndex(...)` + do compile here. +- When you use that lane, expand dependent names inside the specialized body: + replace `Policy::` with the concrete owner, replace non-type template params like `Unk2` + with literal `true` / `false`, and replace aliases like `T *` / `KeyType` with the + concrete instantiation types. +- Prefer this over owner-specific derived helper classes when you need per-instantiation + source bodies but want to preserve the original `VecHashMap<...>` owner names in DWARF. + --- ## Discovered Matching Patterns @@ -525,7 +630,77 @@ TU: | Function: TU: zAttribSys | Function: \_STL::\_Rb_tree::\_M_insert If an STL node insertion path refuses to match, check whether the element type is missing explicit inline special members that the original source exposed. Adding the Dwarf-backed `operator new`, `operator delete`, placement `new`, copy constructor, and tiny accessors to `TypeDesc` made the tree node creation/insertion path match exactly. +### WrapperOwnedVecHashMapHelpers + +TU: zAttribSys | Function: Class::SetTableBuffer / Class::AddCollection / Database::AddClass +For `VecHashMap`-backed wrapper tables, keep `VecHashMap::Clear()` as a named helper, keep the `CollectionHashMap` constructor and scan helpers on the thin wrapper in `AttribPrivate.h`, and rely on the global placement `operator new` from SN's `` instead of adding a `Node::operator new` member. That combination restored `Class::SetTableBuffer`, `Class::AddCollection`, and `Database::AddClass` to PASS/PASS without changing objdiff. + +### FirstFunctionAnchorsKeyedConstructors + +TU: zAttribSys | Function: global constructors keyed to Attrib::Class::Class / ClassPrivate::CollectionHashMap::~CollectionHashMap +In a jumbo TU, do not move a newly restored out-of-line wrapper special member above the file's first real top-level function without rechecking symbol order. In `AttribClass.cpp`, restoring `CollectionHashMap::~CollectionHashMap()` was necessary to emit the destructor and keep `Class::Delete` matched, but placing it above `Class::Class` renamed the 44-byte `global constructors keyed to ...` helper. Keeping `Class::Class` first and moving the wrapper dtor below it restored the helper name and the TU's `99.91143%` floor. + +### SharedInlineParameterNamesMatter + +TU: zAttribSys | Function: Collection::Contains / Collection::NextKey / Collection::GetNode / HashMap::UpdateSearchLength +When a byte-exact mismatch fans out through the same shared inline helper, verify the helper's recovered parameter names before restructuring callers. Renaming `HashMapTablePolicy::WrapIndex`'s first parameter from `k` back to DWARF's `index` cleared four matched-function DWARF mismatches at once without changing objdiff. + +### TernaryNodeGetClearsWrapperDWARFNoise + +TU: zAttribSys | Function: Class::RemoveCollection / VecHashMap::RemoveIndex +When `VecHashMap::Node::Get()` is written as `return IsValid() ? mPtr : nullptr;` instead of an `if (IsValid()) return mPtr; return nullptr;` ladder, `Class::RemoveCollection` drops the stray `RemoveIndex::result // r26` DWARF mismatch while leaving objdiff and `Database::RemoveClass` unchanged. This is a safe retained cleanup, but it does not touch the remaining `Database::RemoveClass` first-inline `newMaxSearch r31 -> r30` DWARF debt. + +### MaxSearchGreaterFixesRemoveCollectionDwarf + +TU: zAttribSys | Function: Class::RemoveCollection / VecHashMap::UpdateSearchLength +Writing the second `UpdateSearchLength` search loop as `for (unsigned int searchLen = 1; maxSearch > searchLen; searchLen++)` instead of `searchLen < maxSearch` was the first real retained partial win. On `zAttribSys` it made `Class::RemoveCollection` DWARF-exact, improved that wrapper's objdiff slightly, and nudged the unit text floor from `99.91143%` to `99.9116%`, but it did not help `Database::RemoveClass`. + +### SearchFirstHoistIsFalseFriend + +TU: zAttribSys | Function: Database::RemoveClass / VecHashMap::UpdateSearchLength +Hoisting the second-loop `searchLen` declaration above `newMaxSearch` (`unsigned int searchLen = 1; unsigned int newMaxSearch = 0; for (; maxSearch > searchLen; searchLen++)`) is a DWARF-only false friend. On real forced rebuilds, direct `dtk elf disasm` diffs show that both wrapper symbols are text-identical to the retained block-scoped floor, so it does **not** buy real code progress. What it does change is DWARF: it hoists `searchLen` out of the anonymous loop block in both inlined `UpdateSearchLength` bodies and drops normalized DWARF to about `86.6%`, so do not keep or build on that variant. + +### BlockScopedSearchLenRaisesTextButSharesR6Debt + +TU: zAttribSys | Function: Class::RemoveCollection / Database::RemoveClass / VecHashMap::UpdateSearchLength +The block-scoped variant `unsigned int newMaxSearch; { unsigned int searchLen = 1; newMaxSearch = 0; for (; maxSearch > searchLen; searchLen++) ... }` is the current best-known text floor. On `zAttribSys` it fixes `Database::RemoveClass`'s **second-inline** `newMaxSearch // r31` placement, raises both remaining wrappers to `98.6%`, and lifts the TU to `99.91307%` (`42B` remaining), but it also collapses the remaining debt in **both** wrappers to the same single mismatch: the shared second-inline `searchLen // r7 -> r6` register-home swap. +On that `99.91307%` plateau, a whole batch of "safe" follow-up nudges stayed completely inert: empty `if (Unk2)` / `if (!Unk2)` hooks inside the late loop, `searchLen = searchLen + 0`, `newMaxSearch = newMaxSearch + 0`, `register` hints on `searchLen` / `newMaxSearch`, typed-pointer `IsValid()`, `Move`'s C-style cast, `Key()`'s zero spellings, local `asm("r7")` register variables, and even an empty `asm volatile("" : "+r"(searchLen))` constraint. If you revisit this plateau, skip those low-risk hooks and look for a genuinely different source shape. + +### SplitSearchLenInitIsAsmIdenticalNoise + +TU: zAttribSys | Function: Class::RemoveCollection / Database::RemoveClass / VecHashMap::UpdateSearchLength +Splitting the retained block-scoped `searchLen` initializer into declaration plus assignment (`unsigned int searchLen; searchLen = 1;`) is a false signal. It can make `verify` and the per-function `decomp-status` estimates wobble slightly, but `python tools/prodg_dump.py diff --stages s` shows the emitted assembly for the shared `UpdateSearchLength` template and for both wrapper symbols is identical to the retained block floor, so do not treat that variant as real progress. + +### PreincrementLessThanFixesHeaderButMovesGlobalRegs + +TU: zAttribSys | Function: Class::RemoveCollection / Database::RemoveClass / VecHashMap::UpdateSearchLength +Writing the late scan as `for (unsigned int searchLen = 0; ++searchLen < maxSearch; )` is the first condition-only rewrite that makes the second `UpdateSearchLength` loop header itself line up: it fixes the `cmplwi r4, 1` fold, restores the `li r7, 1` / `cmplw r7, r4` shape, and also fixes `Database::RemoveClass`'s `newMaxSearch // r31` placement in that late inline. But it is still a false friend overall: it moves both wrappers onto a broader `r6`/`r7` swap family, drops `Class::RemoveCollection` to `98.6% / 99.6%`, leaves `Database::RemoveClass` at `98.5% / 99.6%`, and changes the remaining mismatches rather than removing them. + +### ObjectDisasmResolvesWrapperDrift + +TU: zAttribSys | Function: Class::RemoveCollection / Database::RemoveClass +When wrapper `verify` / `objdiff` movement disagrees with `.s`-level or dump-summary comparisons, disassemble the rebuilt object with `build/tools/dtk elf disasm` and diff the `.fn/.endfn` blocks directly with `python tools/prodg_dump.py diff`. On `zAttribSys`, that direct object diff corrected a stale assumption: the commonly revisited `search-first` late-loop variant is object-identical to the retained block-scoped floor in both wrapper symbols and only changes DWARF. Use the object-level symbol diff when you need the truth about wrapper-emitted code, not just helper `.s` output or status churn. + +### Unk2SelectiveLoopBodyHooksAreRealButUnsafe + +TU: zAttribSys | Function: Class::RemoveCollection / Database::RemoveClass / VecHashMap::RemoveIndex +The second empty `for` body in `VecHashMap::RemoveIndex` is a real per-instantiation regalloc lever via the `Unk2` template boolean, but the obvious source shapes are false friends. In this endgame, baseline `if (Unk2) {}` inside that empty body perturbs only `Database::RemoveClass` while leaving `Class::RemoveCollection` at the retained baseline, and baseline `if (!Unk2) {}` perturbs only `Class::RemoveCollection` while leaving `Database::RemoveClass` at the retained baseline. On top of the block-scoped `UpdateSearchLength` near-miss, `if (!Unk2) {}` still perturbs only `Class::RemoveCollection` while leaving `Database::RemoveClass` at the block state. That proves the `Unk2` polarity and the body-CFG selectivity are real, but the tested body forms (`{}`, expression statements, `switch (0)`, `continue`) all regressed, so do not assume an `Unk2`-guarded loop-body tweak is a safe hybrid fix by itself. By contrast, wrapping an empty body as `if (Unk2) { do {} while (0); }` or `if (!Unk2) { do {} while (0); }` was fully inert on the retained floor and is not a useful perturbation. + +### HeaderOnlyZAttribSweepsNeedForcedRebuild + +TU: zAttribSys | Function: any header-defined inline in `VecHashMap64.h` +Header-only sweeps against `VecHashMap64.h` can silently compare stale code unless you force a rebuild of the rebuilt jumbo object first. The generated `build.ninja` rule for `build/GOWE69/src/Speed/Indep/SourceLists/zAttribSys.o` tracks only `src/Speed/Indep/SourceLists/zAttribSys.cpp`, so after editing included headers you must delete that rebuilt `.o` (and, if needed for tooling, refresh the `.ctx`) before trusting `verify` or `diff` output. + +### SizedDeletePathBeatsStrayUnsizedDelete + +TU: zAttribSys | Function: Collection::~Collection / Class::Delete / HashMap::PreFlightAdd +If delete-path DWARF keeps collapsing to an empty unsized `operator delete()` helper, re-check whether the type still has a stray unsized `operator delete(void *)` overload that the original source never exposed. In `AttribHashMap.h`, removing the unsized `HashMap::operator delete(void *)` let ProDG reuse the sized delete path again and made `Collection::~Collection` and `Class::Delete` DWARF-exact without moving objdiff. + ### RegisterAllocatorTieBreakDeadEnd TU: zAttribSys | Function: Class::RemoveCollection / Database::RemoveClass -If two near-matching functions differ only because the same inlined helper chain lands `mTableSize` in `r6` in the original but `r7` in the rebuild, treat it as a likely ProDG/GCC 2.95 register-allocation tie-break, not a normal source mismatch. In `zAttribSys`, `VecHashMap::FindIndex` inlined through `Remove -> RemoveIndex -> UpdateSearchLength` produced a stable `lwz r6, 4(r3)` vs `lwz r7, 4(r3)` split, which then propagated into later `UpdateSearchLength` control-flow differences. This survived 300+ source experiments: loop-form changes, adding/removing temporaries, splitting/merging expressions, helper inline/outline changes, declaration-order tweaks, member type changes, access-control changes, template method reorderings, and inline vs out-of-line ctor/dtor placement. Once the diff has collapsed to this kind of isolated register swap and DWARF locals/inlining already match, stop attacking each caller separately. Document the functions as `NON_MATCHING`, note the shared inlined root cause, and only consider flag permutation or compiler-level investigation as a last resort. +If two near-matching functions differ only because the same inlined helper chain lands long-lived locals in different caller-saved registers, treat it as a likely ProDG/GCC 2.95 register-allocation tie-break, not a normal source mismatch. In `zAttribSys`, the earlier retained `maxSearch > searchLen` rewrite moved the live endgame debt into the **second** `UpdateSearchLength` inline reached from `RemoveIndex`'s empty follow-up loop, and the current retained block-scoped floor collapses the remaining debt even further: **both** `Class::RemoveCollection` and `Database::RemoveClass` now fail on the same shared `searchLen // r7 -> r6` swap in that late inline. This survived 300+ source experiments plus later focused sweeps of `RemoveIndex` statement order, `FindIndex` local declaration order, wrapper-result liveness, first-call shapes, second-loop declaration/scope variants, second-call expression sugar, `UpdateSearchLength` prelude condition forms, `register` hints on params/locals, per-instantiation `UpdateSearchLength` specializations with identical bodies, and combination searches across individually neutral source toggles. Temporary compiler-side probing was no better: single-flag ProDG toggles around `gcse`/scheduling/CSE/regmove/caller-saves/thread-jumps/delayed-branch either regressed or produced no change, local register variables did not reserve hard registers, empty asm constraints on the loop-carried local were inert, and explicit `UpdateSearchLength` specialization was catastrophic even with an identical body. Once the remaining diff has collapsed to this kind of isolated shared register-home swap, stop attacking each caller separately. Document the wrappers as `NON_MATCHING`, note the shared inlined root cause, and only revisit the area if you have a genuinely new source shape or stronger compiler evidence than the dead ends above. + +### NamedRodataForInlinedAllocatorStrings +TU: zAttribSys | Function: DatabaseExportPolicy::Initialize +When an inlined allocator path must reference a specific rodata symbol, replace a repeated string literal with a named `static const char[]` so the compiler preserves the expected rodata label and relocation pattern. diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/AttribSys.h b/src/Speed/Indep/Tools/AttribSys/Runtime/AttribSys.h index f458308a4..8d18e02ea 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/AttribSys.h +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/AttribSys.h @@ -100,8 +100,22 @@ class TypeDesc { static ITypeHandler *Lookup(Type t); static Type NameToType(const char *name); + void *operator new(std::size_t bytes) { + return Alloc(bytes, "Attrib::TypeDesc"); + } + + void operator delete(void *ptr, std::size_t bytes) { + Free(ptr, bytes, "Attrib::TypeDesc"); + } + + void *operator new(std::size_t, void *ptr) { + return ptr; + } + TypeDesc() : mType(0), mName(""), mSize(0), mIndex(0), mHandler(nullptr) {} + TypeDesc(const TypeDesc &src) : mType(src.mType), mName(src.mName), mSize(src.mSize), mIndex(src.mIndex), mHandler(src.mHandler) {} + TypeDesc(unsigned int t) : mType(t), mName(nullptr), mSize(0), mIndex(0), mHandler(Lookup(t)) {} TypeDesc(const char *name, std::size_t size, std::size_t index) @@ -111,6 +125,10 @@ class TypeDesc { return mType; } + const char *GetName() const { + return mName; + } + unsigned int GetSize() const { return mSize; } @@ -139,7 +157,14 @@ class TypeDesc { class TypeDescPtrVec : public std::vector {}; // total size: 0x10 -class TypeTable : public std::set {}; +class TypeTable : public std::set { + public: + ~TypeTable(); + + void operator delete(void *ptr, std::size_t bytes) { + Free(ptr, bytes, "Attrib::TypeTable"); + } +}; // total size: 0x8 class CollectionList : public std::list {}; @@ -163,17 +188,11 @@ class Database { const TypeDesc &GetTypeDesc(Type t) const; void DumpContents(Key classFilter) const; - static Database &Get() { - return *sThis; - } + static Database &Get(); - void operator delete(void *ptr, std::size_t bytes) { - Free(ptr, bytes, "Attrib::Database"); - } + void operator delete(void *ptr, unsigned int bytes); - bool IsInitialized() { - return sThis != nullptr; - } + static bool IsInitialized(); friend class DatabasePrivate; friend class DatabaseExportPolicy; @@ -187,6 +206,18 @@ class Database { DatabasePrivate &mPrivates; // offset 0x0, size 0x4 }; +inline Database &Database::Get() { + return *sThis; +} + +inline void Database::operator delete(void *ptr, unsigned int bytes) { + Free(ptr, bytes, "Attrib::Database"); +} + +inline bool Database::IsInitialized() { + return sThis != nullptr; +} + class Array { #define Flag_AlignedAt16 (1 << 15) private: // Returns the base location of this array's data @@ -383,10 +414,43 @@ class Node { Flag_IsLocatable = 1 << 6, }; + // DWARF order: GetFlag, SetFlag, operator new, operator delete, operator new(placement), + // operator delete(), Node(), Node(key,...), operator=, operator==/!=/< (Node), + // operator==/!=/< (uint), RequiresRelease..IsLocatable, Invalidate, IsValid, + // GetPointer(void*), GetPointer(const void*), GetArray, GetKey, GetType, + // GetSize, GetCount, GetTypeDesc, MaxSearch, ResetSearchLength, + // SetSearchLength, Move + + bool GetFlag(unsigned int mask) const { + return mFlags & mask; + } + + void SetFlag(unsigned int mask, bool value) { + if (value) { + mFlags |= mask; + } else { + mFlags &= ~mask; + } + } + + void *operator new(std::size_t bytes) { + return AttribAlloc::Allocate(bytes, ""); + } + + void operator delete(void *ptr, std::size_t bytes) { + AttribAlloc::Free(ptr, bytes, ""); + } + void *operator new(std::size_t, void *ptr) { return ptr; } + void *operator new(std::size_t, void *ptr, unsigned int) { + return ptr; + } + + void operator delete(void *ptr) {} + Node() : mKey(0), mTypeIndex(0), mMax(0), mFlags(0), mPtr(this) {} Node(Key key, unsigned int type, void *ptr, bool ptrIsRaw, unsigned char flags, void *layoutptr) @@ -396,20 +460,22 @@ class Node { } } - void Move(Node &src) { - mKey = src.mKey; - mTypeIndex = src.mTypeIndex; - mPtr = src.mPtr; - mFlags = src.mFlags; - - src.mPtr = &src; - src.mFlags = 0; - src.mKey = 0; + const Node &operator=(const Node &rhs) { + mKey = rhs.mKey; + mPtr = rhs.mPtr; + mTypeIndex = rhs.mTypeIndex; + mMax = rhs.mMax; + mFlags = rhs.mFlags; + return *this; } - bool GetFlag(unsigned int mask) const { - return mFlags & mask; - } + bool operator==(const Node &rhs) const { return mKey == rhs.mKey; } + bool operator!=(const Node &rhs) const { return mKey != rhs.mKey; } + bool operator<(const Node &rhs) const { return mKey < rhs.mKey; } + + bool operator==(unsigned int rhs) const { return mKey == rhs; } + bool operator!=(unsigned int rhs) const { return mKey != rhs; } + bool operator<(unsigned int rhs) const { return mKey < rhs; } bool RequiresRelease() const { return GetFlag(Flag_RequiresRelease); @@ -439,6 +505,11 @@ class Node { return GetFlag(Flag_IsLocatable); } + void Invalidate() { + mPtr = this; + mKey = 0; + } + bool IsValid() const { return IsLaidOut() || mPtr != this; } @@ -453,6 +524,16 @@ class Node { } } + void *GetPointer(const void *layoutptr) const { + if (IsByValue()) { + return &mValue; + } else if (IsLaidOut()) { + return (void *)(uintptr_t(layoutptr) + uintptr_t(mPtr)); + } else { + return mPtr; + } + } + Array *GetArray(void *layoutptr) const { if (IsLaidOut()) { return (Array *)(uintptr_t(layoutptr) + uintptr_t(mArray)); @@ -461,6 +542,18 @@ class Node { } } + Key GetKey() const { + return IsValid() ? mKey : 0; + } + + unsigned int GetType() const { + return mTypeIndex; + } + + unsigned int GetSize(void *layoutptr) const { + return GetTypeDesc().GetSize(); + } + std::size_t GetCount(void *layoutptr) const { if (IsValid()) { if (IsArray()) { @@ -471,29 +564,31 @@ class Node { return 0; } - Key GetKey() const { - return IsValid() ? mKey : 0; + const TypeDesc &GetTypeDesc() const { + return Database::Get().GetIndexedTypeDesc(mTypeIndex); } std::size_t MaxSearch() const { return mMax; } - void SetSearchLength(std::size_t searchLen) { - mMax = std::max(mMax, (unsigned char)searchLen); - } - void ResetSearchLength(std::size_t searchLen) { mMax = searchLen; } - const TypeDesc &GetTypeDesc() const { - return Database::Get().GetIndexedTypeDesc(mTypeIndex); + void SetSearchLength(std::size_t searchLen) { + mMax = std::max(mMax, (unsigned char)searchLen); } - void Invalidate() { - mPtr = this; - mKey = 0; + void Move(Node &src) { + mKey = src.mKey; + mTypeIndex = src.mTypeIndex; + mPtr = src.mPtr; + mFlags = src.mFlags; + + src.mPtr = &src; + src.mFlags = 0; + src.mKey = 0; } private: @@ -514,16 +609,12 @@ class Class { public: class TablePolicy { public: - static std::size_t KeyIndex(std::size_t k, std::size_t tableSize, unsigned int keyShift) { - return RotateNTo32(k, keyShift) % tableSize; - } - - static std::size_t WrapIndex(std::size_t index, std::size_t tableSize, unsigned int keyShift) { - return index % tableSize; + static void *Alloc(std::size_t bytes) { + return TableAllocFunc(bytes); } - static std::size_t TableSize(std::size_t entries) { - return AdjustHashTableSize(entries); + static void Free(void *ptr, std::size_t bytes) { + TableFreeFunc(ptr, bytes); } static std::size_t GrowRequest(std::size_t currententries, bool collisionoverrun) { @@ -534,13 +625,13 @@ class Class { } } - static void *Alloc(std::size_t bytes) { - return TableAllocFunc(bytes); + static std::size_t TableSize(std::size_t entries) { + return AdjustHashTableSize(entries); } - static void Free(void *ptr, std::size_t bytes) { - TableFreeFunc(ptr, bytes); - } + static unsigned int KeyIndex(unsigned int k, unsigned int tableSize, unsigned int keyShift); + + static unsigned int WrapIndex(unsigned int index, unsigned int tableSize, unsigned int keyShift); }; Class(Key k, ClassPrivate &privates); @@ -557,6 +648,7 @@ class Class { unsigned int GetNumCollections() const; Key GetFirstCollection() const; Key GetNextCollection(Key prev) const; + void Reserve(unsigned int spaceForAdditionalCollections); void SetTableBuffer(void *fixedAlloc, std::size_t bytes); unsigned int GetTableNodeSize() const; void CopyLayout(void *srcLayout, void *dstLayout) const; diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribClass.cpp b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribClass.cpp index 81714e16e..4d21a0917 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribClass.cpp +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribClass.cpp @@ -13,6 +13,8 @@ Class::~Class() { Database::Get().RemoveClass(this); } +ClassPrivate::CollectionHashMap::~CollectionHashMap() {} + const Definition *Class::GetDefinition(Key key) const { Definition target(key); const Definition *b = mPrivates.mDefinitions; @@ -77,12 +79,16 @@ Key Class::GetNextCollection(Key prev) const { return 0; } +inline void Class::Reserve(unsigned int spaceForAdditionalCollections) { + mPrivates.mCollections.Reserve(mPrivates.mCollections.Size() + spaceForAdditionalCollections); +} + void Class::SetTableBuffer(void *fixedAlloc, std::size_t bytes) { mPrivates.mCollections.SetTableBuffer(fixedAlloc, bytes); } unsigned int Class::GetTableNodeSize() const { - return 12; + return mPrivates.mCollections.GetTableNodeSize(); } void Class::Delete() const { @@ -93,6 +99,7 @@ bool Class::AddCollection(Collection *c) { return mPrivates.mCollections.Add(c->GetKey(), c); } +// NON_MATCHING: 98.6% / 99.6% - best known floor uses the block-scoped late VecHashMap::UpdateSearchLength scan, but the shared searchLen local still lands in r6 instead of r7 bool Class::RemoveCollection(Collection *c) { return mPrivates.mCollections.Remove(c->GetKey()); } @@ -104,7 +111,7 @@ void *Class::AllocLayout() const { void *data = Attrib::Alloc(mPrivates.mLayoutSize, "Attrib::Class"); memset(data, 0, mPrivates.mLayoutSize); - Definition *defs = mPrivates.mDefinitions; + const Definition *defs = mPrivates.mDefinitions; for (std::size_t d = 0; d < mPrivates.mNumDefinitions; d++) { if (defs[d].IsArray() && defs[d].InLayout()) { char *arrayptr = (char *)data + defs[d].GetOffset(); // or maybe the offset is added later diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribCollection.cpp b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribCollection.cpp index 8fa2d693b..7bdfef386 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribCollection.cpp +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribCollection.cpp @@ -56,7 +56,7 @@ Collection::Collection(const CollectionLoadData &loadData, Vault *v) : mTable(lo const unsigned int *typeList = loadData.GetTypes(); const CollectionLoadData::AttribEntry *entries = loadData.GetEntries(); - for (std::size_t i = 0; i < loadData.mNumEntries; i++) { + for (unsigned int i = 0; i < loadData.mNumEntries; i++) { const CollectionLoadData::AttribEntry &entry = entries[i]; if (entry.mNodeFlags & Node::Flag_IsByValue) { unsigned int bytes = Database::Get().GetTypeDesc(typeList[entry.mType]).GetSize(); @@ -201,9 +201,10 @@ bool Collection::AddAttribute(Key attributeKey, unsigned int count) { if (Contains(attributeKey)) { return false; } + const Class *c = mClass; bool result = false; unsigned char flags = 0; - const Attrib::Definition *d = mClass->GetDefinition(attributeKey); + const Attrib::Definition *d = c->GetDefinition(attributeKey); if (d) { d->InLayout(); if (d->IsArray()) { diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribDatabase.cpp b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribDatabase.cpp index 6f9d1d145..efc56f466 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribDatabase.cpp +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribDatabase.cpp @@ -1,6 +1,7 @@ #include "../AttribHash.h" #include "../AttribSys.h" #include "AttribPrivate.h" +#include "Speed/Indep/Libs/Support/Utility/UMath.h" #include "Speed/Indep/Tools/AttribSys/Runtime/AttribLoadAndGo.h" #include @@ -218,6 +219,7 @@ bool Database::AddClass(Class *c) { return mPrivates.mClasses.Add(c->GetKey(), c); } +// NON_MATCHING: 98.6% / 99.6% - best known floor uses the block-scoped late VecHashMap::UpdateSearchLength scan, but the shared searchLen local still lands in r6 instead of r7 void Database::RemoveClass(const Class *c) { mPrivates.mClasses.Remove(c->GetKey()); } @@ -239,4 +241,8 @@ Key StringToKey(const char *str) { return StringHash32(str); } +TypeTable::~TypeTable() {} + +ClassTable::~ClassTable() {} + }; // namespace Attrib diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribHashMap.h b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribHashMap.h index 66f23f886..8f8ccc52f 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribHashMap.h +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribHashMap.h @@ -18,8 +18,8 @@ class HashMap { return RotateNTo32(k, keyShift) % tableSize; } - static std::size_t WrapIndex(Key k, std::size_t tableSize, unsigned int keyShift) { - return k % tableSize; + static std::size_t WrapIndex(Key index, std::size_t tableSize, unsigned int keyShift) { + return index % tableSize; } static void *Alloc(std::size_t bytes) { @@ -43,10 +43,18 @@ class HashMap { } }; + void *operator new(std::size_t bytes) { + return Alloc(bytes, "Attrib::HashMap"); + } + void operator delete(void *ptr, std::size_t bytes) { Free(ptr, bytes, "Attrib::HashMap"); } + void *operator new(std::size_t, void *ptr) { + return ptr; + } + HashMap(std::size_t reservationSize, unsigned int keyShift, bool exactFit) : mTable(nullptr), mTableSize(0), mNumEntries(0), mWorstCollision(0), mKeyShift(keyShift) { if (reservationSize != 0) { @@ -60,6 +68,36 @@ class HashMap { } } + bool ValidIndex(unsigned int index) const { + return index < mTableSize && mTable[index].IsValid(); + } + + unsigned int GetKeyAtIndex(unsigned int index) const { + if (ValidIndex(index)) { + (void)ValidIndex(index); + return mTable[index].GetKey(); + } + return 0; + } + + Node *GetNodeAtIndex(unsigned int index) const { + if (ValidIndex(index)) { + return &mTable[index]; + } + return nullptr; + } + + bool IsArrayAtIndex(unsigned int index) const { + if (ValidIndex(index)) { + return mTable[index].IsArray(); + } + return false; + } + + std::size_t Size() const { + return mNumEntries; + } + bool Add(Key key, unsigned int type, void *ptr, bool ptrIsRaw, unsigned char flags, bool exactFit, void *layoutptr) { if (mNumEntries == mTableSize) { RebuildTable(HashMapTablePolicy::GrowRequest(mTableSize, false)); @@ -80,41 +118,25 @@ class HashMap { } } - void RebuildTable(std::size_t requestedCount) { - if (requestedCount == 0) { - return; - } - std::size_t tableSize = HashMapTablePolicy::TableSize(requestedCount); - Node *oldTable = mTable; - std::size_t oldSize = mTableSize; - mTableSize = tableSize; - mNumEntries = 0; - mWorstCollision = 0; - mTable = new (HashMapTablePolicy::Alloc(mTableSize * sizeof(Node))) Node(); - for (int i = 1; i < mTableSize; i++) { - new (&mTable[i]) Node(); + void *Remove(Node *node, void *layoutptr, bool maintainTableInvariant) { + if (node->IsValid()) { + // useless but needed to match } - if (oldTable) { - for (int i = 0; i < oldSize; i++) { - if (oldTable[i].IsValid()) { - oldTable[i].ResetSearchLength(0); - Transfer(oldTable[i]); - } + Key key = node->GetKey(); + void *result = node->GetPointer(layoutptr); + node->Invalidate(); + mNumEntries--; + + if (maintainTableInvariant) { + std::size_t actualIndex = node - mTable; + std::size_t freedIndex = UpdateSearchLength(HashMapTablePolicy::KeyIndex(key, mTableSize, mKeyShift), actualIndex); + while (freedIndex < mTableSize) { + freedIndex = UpdateSearchLength(freedIndex, freedIndex); } - HashMapTablePolicy::Free(oldTable, oldSize * sizeof(Node)); + } else { + node->ResetSearchLength(0); } - } - - void ClearForRelease() { - mNumEntries = 0; - } - - std::size_t Size() const { - return mNumEntries; - } - - bool ValidIndex(unsigned int index) const { - return index < mTableSize && mTable[index].IsValid(); + return result; } std::size_t FindIndex(Key key) const { @@ -124,8 +146,8 @@ class HashMap { Node *table = mTable; unsigned int actualIndex = HashMapTablePolicy::KeyIndex(key, mTableSize, mKeyShift); unsigned int searchLen = 0; - unsigned int maxSearchLen = table[actualIndex].MaxSearch(); - while (searchLen < maxSearchLen && table[actualIndex].GetKey() != key) { + unsigned int maxSearchlen = table[actualIndex].MaxSearch(); + while (searchLen < maxSearchlen && table[actualIndex].GetKey() != key) { if (table[actualIndex].IsValid()) { } actualIndex = HashMapTablePolicy::WrapIndex(actualIndex + 1, mTableSize, 0); @@ -153,76 +175,93 @@ class HashMap { return index; } - Node *GetNodeAtIndex(unsigned int index) const { - // TODO - if (ValidIndex(index)) { - return &mTable[index]; + void RebuildTable(std::size_t requestedCount) { + if (requestedCount == 0) { + return; + } + std::size_t tableSize = HashMapTablePolicy::TableSize(requestedCount); + Node *oldTable = mTable; + std::size_t oldSize = mTableSize; + mTableSize = tableSize; + mNumEntries = 0; + mWorstCollision = 0; + mTable = new (HashMapTablePolicy::Alloc(mTableSize * sizeof(Node))) Node(); + for (int i = 1; i < mTableSize; i++) { + new (&mTable[i]) Node(); + } + if (oldTable) { + for (int i = 0; i < oldSize; i++) { + if (oldTable[i].IsValid()) { + oldTable[i].ResetSearchLength(0); + Transfer(oldTable[i]); + } + } + HashMapTablePolicy::Free(oldTable, oldSize * sizeof(Node)); } - return nullptr; } - unsigned int GetKeyAtIndex(unsigned int index) const { - if (ValidIndex(index)) { - (void)ValidIndex(index); - return mTable[index].GetKey(); - } - return 0; + void Reserve(std::size_t requestedCount) { + RebuildTable(requestedCount); } - void *Remove(Node *node, void *layoutptr, bool maintainTableInvariant) { - if (node->IsValid()) { - // useless but needed to match - } - Key key = node->GetKey(); - void *result = node->GetPointer(layoutptr); - node->Invalidate(); - mNumEntries--; + std::size_t Capacity() const { + return mTableSize; + } - if (maintainTableInvariant) { - std::size_t actualIndex = node - mTable; // or directly and actualIndex is used for something else? - std::size_t freedIndex = UpdateSearchLength(HashMapTablePolicy::KeyIndex(key, mTableSize, mKeyShift), actualIndex); - while (freedIndex < mTableSize) { - freedIndex = UpdateSearchLength(freedIndex, freedIndex); - } - } else { - node->ResetSearchLength(0); - } - return result; + std::size_t Count() const { + return mNumEntries; + } + + unsigned short WorstCollision() const { + return mWorstCollision; + } + + unsigned short KeyShift() const { + return mKeyShift; } // UNSOLVED - unsigned int CountSearchCacheLines(Key key, unsigned int lineSize) { + unsigned int CountSearchCacheLines(Key key, unsigned int lineSize) const { unsigned int result = 0; if (mNumEntries == 0 || key == 0) { return result; } unsigned int prevline = 0; + unsigned int currline; Node *table = mTable; unsigned int actualIndex = HashMapTablePolicy::KeyIndex(key, mTableSize, mKeyShift); unsigned int searchLen = 0; - unsigned int maxSearchLen = table[actualIndex].MaxSearch(); - unsigned int currline = (uintptr_t)&table[actualIndex] >> (lineSize & 0x3f); // TODO huh? + unsigned int maxSearchlen = table[actualIndex].MaxSearch(); + currline = (uintptr_t)&table[actualIndex] >> (lineSize & 0x3f); if (currline != 0) { - result = 1; // commenting this out improves the score prevline = currline; + result = 1; } - for (; searchLen < maxSearchLen; searchLen++) { + for (;;) { + if (searchLen >= maxSearchlen) { + break; + } if (table[actualIndex].GetKey() == key) { - return result; + break; } - actualIndex = HashMapTablePolicy::WrapIndex(actualIndex + 1, mTableSize, mKeyShift); - currline = (uintptr_t)&table[actualIndex] >> (lineSize & 0x3f); + actualIndex = HashMapTablePolicy::WrapIndex(actualIndex + 1, mTableSize, 0); + currline = (uintptr_t)&mTable[actualIndex] >> (lineSize & 0x3f); if (currline != prevline) { prevline = currline; result++; } + searchLen++; } return result; } + void ClearForRelease() { + mNumEntries = 0; + } + private: void Transfer(Node &src) { std::size_t searchLen = 0; @@ -287,14 +326,15 @@ class HashMap { std::size_t PreFlightAdd(Key key, std::size_t targetIndex, std::size_t &searchLen) { searchLen = 0; - while (mTable[targetIndex].IsValid()) { - if (mTable[targetIndex].GetKey() == key) { + std::size_t actualIndex = targetIndex; + while (mTable[actualIndex].IsValid()) { + if (mTable[actualIndex].GetKey() == key) { return static_cast(-1); } - targetIndex = HashMapTablePolicy::WrapIndex(targetIndex + 1, mTableSize, 0); + actualIndex = HashMapTablePolicy::WrapIndex(actualIndex + 1, mTableSize, 0); searchLen++; } - return targetIndex; + return actualIndex; } void PostFlightAdd(std::size_t targetIndex, std::size_t searchLen) { diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribLoadAndGo.cpp b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribLoadAndGo.cpp index a3c302400..4923b9caf 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribLoadAndGo.cpp +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribLoadAndGo.cpp @@ -265,9 +265,9 @@ void Vault::Initialize() { break; case 4: { Vault *depVault = reinterpret_cast(mDepData[ptr->mIndex].mData); - unsigned int exportIndex = depVault->FindExportID(ptr->mExportID); - if (exportIndex != -1) { - *targetptr = reinterpret_cast(depVault->GetExportData(exportIndex)); + unsigned int exportindex = depVault->FindExportID(ptr->mExportID); + if (exportindex != -1) { + *targetptr = reinterpret_cast(depVault->GetExportData(exportindex)); } else { *targetptr = nullptr; } diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribPrivate.h b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribPrivate.h index d27bcd498..f817e3c99 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribPrivate.h +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/Common/AttribPrivate.h @@ -13,6 +13,14 @@ // Credit: Brawltendo namespace Attrib { +inline unsigned int Class::TablePolicy::KeyIndex(unsigned int k, unsigned int tableSize, unsigned int keyShift) { + return RotateNTo32(k, keyShift) % tableSize; +} + +inline unsigned int Class::TablePolicy::WrapIndex(unsigned int index, unsigned int tableSize, unsigned int keyShift) { + return index % tableSize; +} + // total size: 0x20 class CollectionLoadData { public: @@ -197,6 +205,91 @@ class ClassPrivate : public Class { ~CollectionHashMap(); + unsigned int UpdateSearchLengthSameIndex(unsigned int index) { + unsigned int targetIndex = index; + unsigned int freeIndex = index; + unsigned int currentIndex; + if (targetIndex == freeIndex && mTable[targetIndex].MaxSearch() == 0) { + goto special_case; + } + currentIndex = targetIndex; + goto after_special_case; + special_case: + currentIndex = Class::TablePolicy::WrapIndex(targetIndex + mTableSize - mWorstCollision, mTableSize, 0); + { + unsigned int distance = mWorstCollision; + while (mTable[currentIndex].MaxSearch() < distance && distance > 0) { + currentIndex = Class::TablePolicy::WrapIndex(currentIndex + 1, mTableSize, 0); + distance--; + } + if (distance == 0) { + return static_cast(-1); + } + } + after_special_case: + unsigned int maxSearch = mTable[currentIndex].MaxSearch(); + unsigned int worstIndex = Class::TablePolicy::WrapIndex(currentIndex + maxSearch, mTableSize, 0); + targetIndex = currentIndex; + if (mTable[worstIndex].IsValid()) { + Class::TablePolicy::KeyIndex(mTable[worstIndex].Key(), mTableSize, 0); + } + + if (mTable[freeIndex].IsValid()) { + } + + if (freeIndex != worstIndex) { + mTable[freeIndex].Move(mTable[worstIndex]); + } + if (mTable[worstIndex].IsValid()) { + } + + unsigned int newMaxSearch; + { + unsigned int searchLen = 1; + newMaxSearch = 0; + asm("" : : "r"(newMaxSearch)); + for (; searchLen < maxSearch; searchLen++) { + unsigned int loopIndex = Class::TablePolicy::WrapIndex(targetIndex + searchLen, mTableSize, 0); + if (Class::TablePolicy::KeyIndex(mTable[loopIndex].Key(), mTableSize, 0) == targetIndex) { + newMaxSearch = searchLen; + } + } + } + mTable[targetIndex].ResetSearchLength(newMaxSearch); + + if (maxSearch == mWorstCollision && mTable[freeIndex].MaxSearch() < maxSearch && newMaxSearch < maxSearch) { + mWorstCollision = 0; + unsigned int prevWorst; + for (unsigned int i = 0; i < mTableSize && mWorstCollision < maxSearch; i++) { + if (mTable[i].MaxSearch() > mWorstCollision) { + prevWorst = mWorstCollision = mTable[i].MaxSearch(); + } + } + } + + return worstIndex; + } + + Collection *RemoveIndex(unsigned int actualIndex) { + if (!ValidIndex(actualIndex)) { + return nullptr; + } + Collection *result = mTable[actualIndex].Get(); + unsigned int key = mTable[actualIndex].Key(); + mTable[actualIndex].Invalidate(); + mNumEntries--; + + unsigned int freedIndex = UpdateSearchLength(Class::TablePolicy::KeyIndex(key, mTableSize, 0), actualIndex); + for (; freedIndex < mTableSize; freedIndex = UpdateSearchLengthSameIndex(freedIndex)) { + } + return result; + } + + Collection *Remove(unsigned int key) { + unsigned int actualIndex = FindIndex(key); + return RemoveIndex(actualIndex); + } + unsigned int GetNextValidIndex(unsigned int startPoint) const { unsigned int index = startPoint + 1; for (; index < mTableSize && !mTable[index].IsValid(); index++) { @@ -243,6 +336,92 @@ class ClassPrivate : public Class { class ClassTable : public VecHashMap { public: ClassTable(std::size_t capacity) : VecHashMap(capacity) {} + ~ClassTable(); + + unsigned int UpdateSearchLengthSameIndex(unsigned int index) { + unsigned int targetIndex = index; + unsigned int freeIndex = index; + unsigned int currentIndex; + if (targetIndex == freeIndex && mTable[targetIndex].MaxSearch() == 0) { + goto special_case; + } + currentIndex = targetIndex; + goto after_special_case; + special_case: + currentIndex = Class::TablePolicy::WrapIndex(targetIndex + mTableSize - mWorstCollision, mTableSize, 0); + { + unsigned int distance = mWorstCollision; + while (mTable[currentIndex].MaxSearch() < distance && distance > 0) { + currentIndex = Class::TablePolicy::WrapIndex(currentIndex + 1, mTableSize, 0); + distance--; + } + if (distance == 0) { + return static_cast(-1); + } + } + after_special_case: + unsigned int maxSearch = mTable[currentIndex].MaxSearch(); + unsigned int worstIndex = Class::TablePolicy::WrapIndex(currentIndex + maxSearch, mTableSize, 0); + targetIndex = currentIndex; + if (mTable[worstIndex].IsValid()) { + Class::TablePolicy::KeyIndex(mTable[worstIndex].Key(), mTableSize, 0); + } + + if (mTable[freeIndex].IsValid()) { + } + + if (freeIndex != worstIndex) { + mTable[freeIndex].Move(mTable[worstIndex]); + } + if (mTable[worstIndex].IsValid()) { + } + + unsigned int newMaxSearch; + { + newMaxSearch = 0; + unsigned int searchLen = 1; + asm("" : : "r"(newMaxSearch)); + for (; searchLen < maxSearch; searchLen++) { + unsigned int loopIndex = Class::TablePolicy::WrapIndex(targetIndex + searchLen, mTableSize, 0); + if (Class::TablePolicy::KeyIndex(mTable[loopIndex].Key(), mTableSize, 0) == targetIndex) { + newMaxSearch = searchLen; + } + } + } + mTable[targetIndex].ResetSearchLength(newMaxSearch); + + if (maxSearch == mWorstCollision && mTable[freeIndex].MaxSearch() < maxSearch && newMaxSearch < maxSearch) { + mWorstCollision = 0; + unsigned int prevWorst; + for (unsigned int i = 0; i < mTableSize && mWorstCollision < maxSearch; i++) { + if (mTable[i].MaxSearch() > mWorstCollision) { + prevWorst = mWorstCollision = mTable[i].MaxSearch(); + } + } + } + + return worstIndex; + } + + Class *RemoveIndex(unsigned int actualIndex) { + if (!ValidIndex(actualIndex)) { + return nullptr; + } + Class *result = mTable[actualIndex].Get(); + unsigned int key = mTable[actualIndex].Key(); + mTable[actualIndex].Invalidate(); + mNumEntries--; + + unsigned int freedIndex = UpdateSearchLength(Class::TablePolicy::KeyIndex(key, mTableSize, 0), actualIndex); + for (; freedIndex < mTableSize; freedIndex = UpdateSearchLengthSameIndex(freedIndex)) { + } + return result; + } + + Class *Remove(unsigned int key) { + unsigned int actualIndex = FindIndex(key); + return RemoveIndex(actualIndex); + } void operator delete(void *ptr, std::size_t bytes) { Free(ptr, bytes, "Attrib::ClassTable"); diff --git a/src/Speed/Indep/Tools/AttribSys/Runtime/VecHashMap64.h b/src/Speed/Indep/Tools/AttribSys/Runtime/VecHashMap64.h index 644670807..e0314aa65 100644 --- a/src/Speed/Indep/Tools/AttribSys/Runtime/VecHashMap64.h +++ b/src/Speed/Indep/Tools/AttribSys/Runtime/VecHashMap64.h @@ -8,15 +8,10 @@ #include "Speed/Indep/Tools/AttribSys/Runtime/AttribSys.h" // total size: 0x10 -template class VecHashMap { - public: +template class VecHashMap { // total size: 0xC or 0x10 class Node { public: - void *operator new(std::size_t, void *place) { - return place; - } - Node() : mKey(0), mPtr(reinterpret_cast(this)), mMax(0) {} Node(KeyType key, T *ptr) : mKey(key), mPtr(ptr) {} @@ -39,10 +34,7 @@ template Unk3) { - RebuildTable(Policy::GrowRequest(mTableSize, true)); - } - return result; + ~VecHashMap() { + Clear(); } - T *RemoveIndex(std::size_t actualIndex) { - if (!ValidIndex(actualIndex)) { - return nullptr; + bool InternalAdd(KeyType key, T *ptr) { + if (mNumEntries == mTableSize) { + RebuildTable(Policy::GrowRequest(mTableSize, false)); } - T *result = mTable[actualIndex].Get(); - KeyType key = mTable[actualIndex].Key(); - mTable[actualIndex].Invalidate(); - mNumEntries--; + std::size_t targetIndex = Policy::KeyIndex(key, mTableSize, 0); + std::size_t actualIndex = targetIndex; + std::size_t searchLen = 0; + std::size_t tableSize = mTableSize; + while (mTable[actualIndex].IsValid()) { + if (mTable[actualIndex].Key() == key) { + return false; + } + searchLen++; + actualIndex = Policy::WrapIndex(actualIndex + 1, tableSize, 0); + } + if (actualIndex * sizeof(Node) + (uintptr_t)mTable) { + new (&mTable[actualIndex]) Node(key, ptr); + } + mTable[targetIndex].SetSearchLength(searchLen); + if (searchLen > mWorstCollision) { + mWorstCollision = searchLen; + } + mNumEntries++; + return true; + } - std::size_t freedIndex = UpdateSearchLength(Policy::KeyIndex(key, mTableSize, 0), actualIndex); - // TODO UNSOLVED making it a while loop changes the output, in AttribHashMap.h it's a while loop - for (; freedIndex < mTableSize; freedIndex = UpdateSearchLength(freedIndex, freedIndex)) { + void CopyFromOldTable(Node *oldTable, std::size_t oldSize, bool needFree) { + for (std::size_t i = 0; i < mTableSize; i++) { + // TODO UNSOLVED + if (i * sizeof(Node) + (uintptr_t)mTable) { + new (&mTable[i]) Node(); + } + } + if (!oldTable) { + return; + } + for (std::size_t i = 0; i < oldSize; i++) { + if (oldTable[i].IsValid()) { + InternalAdd(oldTable[i].Key(), oldTable[i].Get()); + } + } + if (needFree) { + Policy::Free(oldTable, oldSize * sizeof(Node)); } - return result; } - T *Remove(KeyType key) { - std::size_t actualIndex = FindIndex(key); - return RemoveIndex(actualIndex); + void RebuildTable(std::size_t requestedCount) { + if (requestedCount != 0) { + requestedCount--; + do { + requestedCount++; + std::size_t tableSize = Policy::TableSize(requestedCount); + Node *oldTable = mTable; + std::size_t oldSize = mTableSize; + + mTableSize = tableSize; + mNumEntries = 0; + mWorstCollision = 0; + mTable = reinterpret_cast(Policy::Alloc(mTableSize * sizeof(Node))); + CopyFromOldTable(oldTable, oldSize, true); + } while (mWorstCollision > Unk3); + } } - std::size_t FindIndex(KeyType key) const { + unsigned int FindIndex(KeyType key) const { if (mNumEntries == 0) { return mTableSize; } Node *table = mTable; - std::size_t actualIndex = Policy::KeyIndex(key, mTableSize, 0); - std::size_t searchLen = 0; - std::size_t maxSearchLen = table[actualIndex].MaxSearch(); - while (searchLen < maxSearchLen && table[actualIndex].Key() != key) { - // TODO why is there a Node::IsValid call somewhere here? + unsigned int actualIndex = Policy::KeyIndex(key, mTableSize, 0); + unsigned int searchLen = 0; + unsigned int maxSearchlen = table[actualIndex].MaxSearch(); + while (searchLen < maxSearchlen && table[actualIndex].Key() != key) { if (table[actualIndex].IsValid()) { } actualIndex = Policy::WrapIndex(actualIndex + 1, mTableSize, 0); @@ -151,7 +169,7 @@ template mWorstCollision) { - mWorstCollision = searchLen; + std::size_t GetTableNodeSize() const { + return sizeof(Node); + } + + bool Add(KeyType key, T *ptr) { + bool result = InternalAdd(key, ptr); + if (mWorstCollision > Unk3) { + RebuildTable(Policy::GrowRequest(mTableSize, true)); } - mNumEntries++; - return true; + return result; } - // TODO might this be faulty? - std::size_t UpdateSearchLength(std::size_t targetIndex, std::size_t freeIndex) { + unsigned int UpdateSearchLength(unsigned int targetIndex, unsigned int freeIndex) { if (targetIndex == freeIndex && mTable[targetIndex].MaxSearch() == 0) { targetIndex = Policy::WrapIndex(targetIndex + mTableSize - mWorstCollision, mTableSize, 0); - std::size_t distance = mWorstCollision; + unsigned int distance = mWorstCollision; while (mTable[targetIndex].MaxSearch() < distance && distance > 0) { targetIndex = Policy::WrapIndex(targetIndex + 1, mTableSize, 0); distance--; } if (distance == 0) { - return static_cast(-1); + return static_cast(-1); } } - std::size_t maxSearch = mTable[targetIndex].MaxSearch(); - std::size_t worstIndex = Policy::WrapIndex(targetIndex + maxSearch, mTableSize, 0); + unsigned int maxSearch = mTable[targetIndex].MaxSearch(); + unsigned int worstIndex = Policy::WrapIndex(targetIndex + maxSearch, mTableSize, 0); if (mTable[worstIndex].IsValid()) { Policy::KeyIndex(mTable[worstIndex].Key(), mTableSize, 0); } - if (freeIndex != worstIndex && mTable[freeIndex].IsValid()) { + if (mTable[freeIndex].IsValid()) { + } + + if (freeIndex != worstIndex) { mTable[freeIndex].Move(mTable[worstIndex]); } if (mTable[worstIndex].IsValid()) { } - std::size_t newMaxSearch = 0; - for (std::size_t searchLen = 1; searchLen < maxSearch; searchLen++) { - std::size_t index = Policy::WrapIndex(targetIndex + searchLen, mTableSize, 0); - if (Policy::KeyIndex(mTable[index].Key(), mTableSize, 0) == targetIndex) { - newMaxSearch = searchLen; + unsigned int newMaxSearch; + { + if (Unk2) { + newMaxSearch = 0; + } + unsigned int searchLen = 1; + asm("" : : "r"(targetIndex)); + if (!Unk2) { + newMaxSearch = 0; + } + for (; searchLen < maxSearch; searchLen++) { + unsigned int index = Policy::WrapIndex(targetIndex + searchLen, mTableSize, 0); + if (Policy::KeyIndex(mTable[index].Key(), mTableSize, 0) == targetIndex) { + newMaxSearch = searchLen; + } } } @@ -252,8 +271,8 @@ template mWorstCollision) { prevWorst = mWorstCollision = mTable[i].MaxSearch(); } @@ -263,49 +282,45 @@ template (Policy::Alloc(mTableSize * sizeof(Node))); - CopyFromOldTable(oldTable, oldSize, true); - } while (mWorstCollision > Unk3); + std::size_t GetNextValidIndex(std::size_t startPoint) const { + std::size_t index = startPoint + 1; + for (; index < mTableSize && !mTable[index].IsValid(); index++) { + } + return index; + } + + KeyType GetKeyAtIndex(std::size_t index) const { + if (ValidIndex(index)) { + (void)ValidIndex(index); + return mTable[index].Key(); } + return 0; } - // TODO private protected: Node *mTable; // offset 0x0, size 0x4 - std::size_t mTableSize; // offset 0x4, size 0x4 - std::size_t mNumEntries; // offset 0x8, size 0x4 + unsigned int mTableSize; // offset 0x4, size 0x4 + unsigned int mNumEntries; // offset 0x8, size 0x4 unsigned int mFixedAlloc : 1; // offset 0xC, size 0x4 unsigned int mWorstCollision : 31; // offset 0xC, size 0x4 }; diff --git a/tools/build_matrix.py b/tools/build_matrix.py new file mode 100644 index 000000000..135bce18c --- /dev/null +++ b/tools/build_matrix.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 + +""" +Run sequential build checks across supported platforms. + +Examples: + python tools/build_matrix.py + python tools/build_matrix.py --version GOWE69 --version SLES-53558-A124 + python tools/build_matrix.py --all-source +""" + +import argparse +import os +import subprocess +import sys +import time +from dataclasses import dataclass +from typing import List, Optional, Sequence + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +ROOT_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..")) +DEFAULT_RESTORE_VERSION = "GOWE69" + + +@dataclass(frozen=True) +class PlatformCheck: + version: str + label: str + required_assets: Sequence[str] + + +@dataclass +class StepResult: + name: str + command: List[str] + returncode: int + elapsed: float + log_path: str + output: str + + @property + def ok(self) -> bool: + return self.returncode == 0 + + +@dataclass +class PlatformResult: + platform: PlatformCheck + configure: Optional[StepResult] = None + build: Optional[StepResult] = None + preflight_error: Optional[str] = None + + @property + def ok(self) -> bool: + return ( + self.preflight_error is None + and self.configure is not None + and self.configure.ok + and self.build is not None + and self.build.ok + ) + + +PLATFORMS = ( + PlatformCheck( + version="GOWE69", + label="GameCube", + required_assets=("orig/GOWE69/NFSMWRELEASE.ELF",), + ), + PlatformCheck( + version="EUROPEGERMILESTONE", + label="Xbox 360", + required_assets=("orig/EUROPEGERMILESTONE/NfsMWEuropeGerMilestone.xex",), + ), + PlatformCheck( + version="SLES-53558-A124", + label="PS2", + required_assets=("orig/SLES-53558-A124/NFS.ELF",), + ), +) + +PLATFORM_BY_VERSION = {platform.version: platform for platform in PLATFORMS} + + +def print_section(title: str) -> None: + print(f"\n== {title} ==", flush=True) + + +def tail_lines(text: str, count: int) -> str: + lines = text.rstrip().splitlines() + if len(lines) <= count: + return "\n".join(lines) + return "\n".join(lines[-count:]) + + +def run_logged(command: List[str], log_path: str) -> StepResult: + start = time.monotonic() + try: + completed = subprocess.run( + command, + cwd=ROOT_DIR, + capture_output=True, + text=True, + errors="replace", + ) + output = completed.stdout + if completed.stderr: + if output and not output.endswith("\n"): + output += "\n" + output += completed.stderr + returncode = completed.returncode + except OSError as exc: + output = str(exc) + returncode = 127 + elapsed = time.monotonic() - start + + os.makedirs(os.path.dirname(log_path), exist_ok=True) + with open(log_path, "w", encoding="utf-8") as log_file: + log_file.write(output) + + return StepResult( + name=os.path.basename(log_path), + command=command, + returncode=returncode, + elapsed=elapsed, + log_path=log_path, + output=output, + ) + + +def missing_assets(platform: PlatformCheck) -> List[str]: + missing = [] + for rel_path in platform.required_assets: + abs_path = os.path.join(ROOT_DIR, rel_path) + if not os.path.exists(abs_path): + missing.append(rel_path) + return missing + + +def describe_failure(step: StepResult, tail_count: int) -> None: + print(f"FAIL {step.name}: exit {step.returncode} in {step.elapsed:.2f}s") + print(f"Command: {' '.join(step.command)}") + print(f"Log: {step.log_path}") + if step.output.strip(): + print("--- output tail ---") + print(tail_lines(step.output, tail_count)) + + +def run_platform( + platform: PlatformCheck, build_target: Optional[str], jobs: int, tail_count: int +) -> PlatformResult: + result = PlatformResult(platform=platform) + logs_dir = os.path.join(ROOT_DIR, "build", platform.version, "logs") + + print_section(f"{platform.label} ({platform.version})") + + missing = missing_assets(platform) + if missing: + result.preflight_error = ( + "Missing required assets: " + + ", ".join(missing) + + " (hint: seed shared assets or run worktree bootstrap first)" + ) + print(f"FAIL preflight: {result.preflight_error}") + return result + + configure_cmd = [sys.executable, "configure.py", "--version", platform.version] + configure_log = os.path.join(logs_dir, "build-matrix-configure.log") + print(f"RUN configure: {' '.join(configure_cmd)}") + result.configure = run_logged(configure_cmd, configure_log) + if result.configure.ok: + print(f"OK configure: {result.configure.elapsed:.2f}s ({configure_log})") + else: + describe_failure(result.configure, tail_count) + return result + + build_cmd = ["ninja", "-j", str(jobs)] + if build_target is not None: + build_cmd.append(build_target) + build_name = build_target or "default" + build_log = os.path.join(logs_dir, f"build-matrix-{build_name}.log") + print(f"RUN build: {' '.join(build_cmd)}") + result.build = run_logged(build_cmd, build_log) + if result.build.ok: + print(f"OK build: {result.build.elapsed:.2f}s ({build_log})") + else: + describe_failure(result.build, tail_count) + + return result + + +def restore_version(version: str, tail_count: int) -> bool: + print_section(f"Restore {version}") + log_path = os.path.join(ROOT_DIR, "build", version, "logs", "build-matrix-restore.log") + step = run_logged([sys.executable, "configure.py", "--version", version], log_path) + if step.ok: + print(f"OK restore: {step.elapsed:.2f}s ({log_path})") + return True + + describe_failure(step, tail_count) + return False + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Check sequential builds across all supported platforms." + ) + parser.add_argument( + "--version", + dest="versions", + action="append", + choices=sorted(PLATFORM_BY_VERSION.keys()), + help="Limit the run to one or more versions (default: all platforms).", + ) + parser.add_argument( + "--all-source", + action="store_true", + help="Run `ninja all_source` instead of the default full `ninja`.", + ) + parser.add_argument( + "--jobs", + type=int, + default=1, + help="Parallelism passed to ninja (default: 1).", + ) + parser.add_argument( + "--tail", + type=int, + default=40, + help="How many output lines to print when a step fails (default: 40).", + ) + parser.add_argument( + "--restore-version", + default=DEFAULT_RESTORE_VERSION, + choices=sorted(PLATFORM_BY_VERSION.keys()), + help=f"Version to restore at the end (default: {DEFAULT_RESTORE_VERSION}).", + ) + parser.add_argument( + "--no-restore", + action="store_true", + help="Leave the worktree configured for the last checked version.", + ) + return parser.parse_args() + + +def print_summary( + results: Sequence[PlatformResult], restore_version_name: str, restore_ok: Optional[bool] +) -> None: + print_section("Summary") + for result in results: + if result.preflight_error is not None: + print(f"FAIL {result.platform.version}: {result.preflight_error}") + continue + if result.configure is None or not result.configure.ok: + assert result.configure is not None + print( + f"FAIL {result.platform.version}: configure exit {result.configure.returncode} " + f"({result.configure.elapsed:.2f}s)" + ) + continue + if result.build is None or not result.build.ok: + assert result.build is not None + print( + f"FAIL {result.platform.version}: build exit {result.build.returncode} " + f"({result.build.elapsed:.2f}s)" + ) + continue + total = result.configure.elapsed + result.build.elapsed + print(f"OK {result.platform.version}: {total:.2f}s") + + if restore_ok is not None: + status = "OK" if restore_ok else "FAIL" + print(f"{status:4} restore: {restore_version_name}") + + +args = parse_args() + + +def main() -> int: + selected_versions = args.versions or [platform.version for platform in PLATFORMS] + platforms = [PLATFORM_BY_VERSION[version] for version in selected_versions] + build_target = "all_source" if args.all_source else None + results: List[PlatformResult] = [] + restore_ok: Optional[bool] = None + + print(f"Root: {ROOT_DIR}") + print(f"Build target: {build_target or 'ninja default'}") + + try: + for platform in platforms: + results.append(run_platform(platform, build_target, args.jobs, args.tail)) + finally: + if not args.no_restore: + restore_ok = restore_version(args.restore_version, args.tail) + + print_summary(results, args.restore_version, restore_ok) + + if restore_ok is False: + return 1 + if any(not result.ok for result in results): + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/code_style.py b/tools/code_style.py index ecb85f713..61c3f2ee3 100644 --- a/tools/code_style.py +++ b/tools/code_style.py @@ -84,6 +84,7 @@ class Finding: ) USING_NAMESPACE_PATTERN = re.compile(r"^\s*using\s+namespace\b") NULL_PATTERN = re.compile(r"\bNULL\b") +BARE_PRESENCE_IF_PATTERN = re.compile(r"^\s*#if\s+([A-Za-z_][A-Za-z0-9_]*)\s*$") HEADER_GUARD_IFNDEF_PATTERN = re.compile(r"^\s*#ifndef\s+[A-Za-z0-9_]+\s*$", re.MULTILINE) HEADER_GUARD_DEFINE_PATTERN = re.compile(r"^\s*#define\s+[A-Za-z0-9_]+\s*$", re.MULTILINE) EA_PRAGMA_BLOCK_PATTERN = re.compile( @@ -92,6 +93,16 @@ class Finding: r".*?^\s*#endif\s*$", re.MULTILINE | re.DOTALL, ) +EA_PRAGMA_IFDEF_PATTERN = re.compile( + r"^\s*#ifdef\s+EA_PRAGMA_ONCE_SUPPORTED\s*$", re.MULTILINE +) +RECOVERED_LAYOUT_COMMENT_PATTERN = re.compile( + r"//\s*offset 0x[0-9A-Fa-f]+,\s*size 0x[0-9A-Fa-f]+" +) +RECOVERED_NARROW_UNSIGNED_PATTERN = re.compile(r"\bunsigned\s+(char|short)\b") +BARE_RECOVERY_MARKER_PATTERN = re.compile( + r"//\s*(TODO|UNSOLVED|STRIPPED)\b(?:\s*[.:,-]*)?\s*$" +) SUSPICIOUS_MEMBER_PATTERN = re.compile( r"^(?:" r"_?pad(?:ding)?[0-9A-Fa-f_]*" @@ -441,6 +452,16 @@ def audit_style_guide_rules( if touched_lines is not None and idx not in touched_lines: continue stripped = line.strip() + bare_recovery_marker_match = BARE_RECOVERY_MARKER_PATTERN.search(line) + if bare_recovery_marker_match is not None: + findings.append( + Finding( + path, + idx, + "INFO", + f"`// {bare_recovery_marker_match.group(1)}` has no context; add a short reason or remove the stale recovery marker", + ) + ) if stripped.startswith("//"): continue @@ -471,9 +492,35 @@ def audit_style_guide_rules( "use `nullptr` instead of `NULL`", ) ) + bare_presence_if_match = BARE_PRESENCE_IF_PATTERN.match(line) + if bare_presence_if_match is not None: + findings.append( + Finding( + path, + idx, + "WARN", + f"bare `#if {bare_presence_if_match.group(1)}` looks like a presence check; prefer `#ifdef {bare_presence_if_match.group(1)}` unless a numeric test is intentional", + ) + ) + narrow_type_match = RECOVERED_NARROW_UNSIGNED_PATTERN.search(line) + if ( + narrow_type_match is not None + and RECOVERED_LAYOUT_COMMENT_PATTERN.search(line) is not None + ): + preferred = "uint8" if narrow_type_match.group(1) == "char" else "uint16" + findings.append( + Finding( + path, + idx, + "INFO", + f"recovered layout member uses `{narrow_type_match.group(0)}`; prefer explicit-width `{preferred}` when the field width is known", + ) + ) if ext in HEADER_EXTS: - should_check_guard = touched_lines is None or any(line_no <= 8 for line_no in touched_lines) + should_check_guard = touched_lines is None or any( + line_no <= 12 for line_no in touched_lines + ) if should_check_guard: has_ifndef = HEADER_GUARD_IFNDEF_PATTERN.search(text) is not None has_define = HEADER_GUARD_DEFINE_PATTERN.search(text) is not None @@ -487,6 +534,20 @@ def audit_style_guide_rules( "header guard should use `#ifndef` / `#define` plus the `EA_PRAGMA_ONCE_SUPPORTED` `#pragma once` block", ) ) + pragma_ifdef_match = EA_PRAGMA_IFDEF_PATTERN.search(text) + if pragma_ifdef_match is not None: + pragma_ifdef_line = text[: pragma_ifdef_match.start()].count("\n") + 1 + for idx, line in enumerate(text.splitlines(), 1): + if line.strip().startswith("#include ") and idx < pragma_ifdef_line: + findings.append( + Finding( + path, + idx, + "WARN", + "header include appears before the `EA_PRAGMA_ONCE_SUPPORTED` block; keep the guard / pragma block ahead of includes", + ) + ) + break return findings diff --git a/tools/decomp-workflow.py b/tools/decomp-workflow.py index 84f2f83d5..78e7fee04 100644 --- a/tools/decomp-workflow.py +++ b/tools/decomp-workflow.py @@ -16,12 +16,15 @@ python tools/decomp-workflow.py function -u main/Speed/Indep/SourceLists/zCamera -f UpdateAll --no-source python tools/decomp-workflow.py diff -u main/Speed/Indep/SourceLists/zCamera -d UpdateAll --reloc-diffs all python tools/decomp-workflow.py dwarf -u main/Speed/Indep/SourceLists/zCamera -f UpdateAll + python tools/decomp-workflow.py dwarf-scan -u main/Speed/Indep/SourceLists/zCamera + python tools/decomp-workflow.py dwarf-scan -u main/Speed/Indep/SourceLists/zCamera --objdiff-status match python tools/decomp-workflow.py dwarf -u main/Speed/Indep/SourceLists/zAttribSys -f 'Attrib::Class::RemoveCollection(Attrib::Collection *)' --full-diff python tools/decomp-workflow.py verify -u main/Speed/Indep/SourceLists/zCamera -f UpdateAll python tools/decomp-workflow.py unit -u main/Speed/Indep/SourceLists/zCamera """ import argparse +import difflib import json import re import os @@ -45,6 +48,8 @@ make_abs, run_objdiff_json, ) +from lookup import _candidate_func_names, _sig_contains_name, read_text, split_functions +from split_dwarf_info import apply_umath_fixups SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -56,24 +61,51 @@ PS2_SYMBOLS = os.path.join(ROOT_DIR, "config", "SLES-53558-A124", "symbols.txt") GC_DWARF = os.path.join(ROOT_DIR, "symbols", "Dwarf") DEBUG_LINES = os.path.join(ROOT_DIR, "symbols", "debug_lines.txt") +X360_COMPILER_DIR = os.path.join(ROOT_DIR, "build", "compilers", "X360", "14.00.2110") +PS2_COMPILER_DIR = os.path.join(ROOT_DIR, "build", "compilers", "PS2", "ee-gcc2.9-991111") +MIPS_BINUTILS_DIR = os.path.join(ROOT_DIR, "build", "mips_binutils") DEFAULT_SMOKE_UNIT = "main/Speed/Indep/SourceLists/zCamera" DEBUG_SYMBOL_PROBE_MANGLED = "UpdateAll__6Cameraf" DEBUG_SYMBOL_PROBE_DEMANGLED = "Camera::UpdateAll(float)" DEBUG_SYMBOL_PROBE_GC_ADDR = "0x80065A84" REBUILT_DEBUG_LINE_RE = re.compile(r"^\s*([0-9A-Fa-f]+)\s*:") +DWARF_HEX_RE = re.compile(r"0x[0-9A-Fa-f]+") LOW_MATCH_PRIORITY_THRESHOLD = 60.0 VERY_LOW_MATCH_PRIORITY_THRESHOLD = 40.0 HIGH_MATCH_CLEANUP_THRESHOLD = 85.0 VERY_HIGH_MATCH_CLEANUP_THRESHOLD = 95.0 +FunctionBlock = Tuple[str, str, str, str] SHARED_ASSET_REQUIREMENTS = [ (os.path.join("build", "tools"), "downloaded tooling"), (os.path.join("orig", "GOWE69", "NFSMWRELEASE.ELF"), "GameCube original ELF"), + ( + os.path.join("orig", "EUROPEGERMILESTONE", "NfsMWEuropeGerMilestone.xex"), + "Xbox original XEX", + ), (os.path.join("orig", "SLES-53558-A124", "NFS.ELF"), "PS2 original ELF"), (os.path.join("symbols", "Dwarf"), "DWARF dump"), ] +PLATFORM_BUILD_REQUIREMENTS = [ + ( + "x360-compiler", + X360_COMPILER_DIR, + "missing (seed build/compilers in this worktree for Xbox builds)", + ), + ( + "ps2-compiler", + PS2_COMPILER_DIR, + "missing (seed build/compilers in this worktree for PS2 builds)", + ), + ( + "ps2-binutils", + MIPS_BINUTILS_DIR, + "missing (seed build/mips_binutils in this worktree for PS2 builds)", + ), +] + class WorkflowError(RuntimeError): pass @@ -292,6 +324,14 @@ def choose_objdiff_row(unit_name: str, function_name: str, reloc_diffs: str = "n return matches[0] +def resolve_exact_function_name( + unit_name: str, function_name: str, reloc_diffs: str = "none" +) -> str: + return str( + choose_objdiff_row(unit_name, function_name, reloc_diffs=reloc_diffs)["name"] + ) + + def load_dwarf_report( unit_name: str, function_name: str, @@ -307,6 +347,355 @@ def load_dwarf_report( raise WorkflowError(f"dwarf-compare.py returned invalid JSON: {e}") +def load_dwarf_blocks( + path: str, folder_mode: bool, apply_split_fixups_in_ram: bool = False +) -> List[FunctionBlock]: + if folder_mode: + text = read_text(os.path.join(path, "functions.nothpp")) + else: + text = read_text(path) + if apply_split_fixups_in_ram: + text = apply_umath_fixups(text) + return split_functions(text) + + +def find_dwarf_function_blocks( + funcs: Sequence[FunctionBlock], query: str +) -> List[FunctionBlock]: + candidates = _candidate_func_names(query) + exact_matches: List[FunctionBlock] = [] + fuzzy_matches: List[FunctionBlock] = [] + + for func in funcs: + sig_line = func[2] + if sig_line == query: + exact_matches.append(func) + elif any(_sig_contains_name(sig_line, candidate) for candidate in candidates): + fuzzy_matches.append(func) + + if exact_matches: + return exact_matches + return fuzzy_matches + + +def choose_dwarf_function_block( + funcs: Sequence[FunctionBlock], query: str, label: str +) -> FunctionBlock: + matches = find_dwarf_function_blocks(funcs, query) + if not matches: + raise WorkflowError(f"{label}: function '{query}' not found.") + if len(matches) > 1: + preview = "\n".join(f" - {match[2]}" for match in matches[:8]) + extra = "" + if len(matches) > 8: + extra = f"\n ... {len(matches) - 8} more" + raise WorkflowError( + f"{label}: function query '{query}' matched multiple DWARF blocks.\n" + f"Use a more specific function name.\n{preview}{extra}" + ) + return matches[0] + + +def normalize_dwarf_line(line: str) -> str: + stripped = line.rstrip("\n").rstrip() + if stripped.startswith("// Range:"): + return "// Range: " + return DWARF_HEX_RE.sub("0xADDR", stripped) + + +def normalize_dwarf_block(block: str) -> List[str]: + return [normalize_dwarf_line(line) for line in block.splitlines()] + + +def count_dwarf_opcodes( + opcodes: Sequence[Tuple[str, int, int, int, int]] +) -> Dict[str, int]: + matching = 0 + original_only = 0 + rebuilt_only = 0 + changed_groups = 0 + for tag, i1, i2, j1, j2 in opcodes: + if tag == "equal": + matching += i2 - i1 + continue + changed_groups += 1 + if tag in ("replace", "delete"): + original_only += i2 - i1 + if tag in ("replace", "insert"): + rebuilt_only += j2 - j1 + return { + "matching_lines": matching, + "original_only_lines": original_only, + "rebuilt_only_lines": rebuilt_only, + "changed_groups": changed_groups, + } + + +def build_dwarf_scan_row( + row: Dict[str, Any], + original_funcs: Sequence[FunctionBlock], + rebuilt_funcs: Sequence[FunctionBlock], +) -> Dict[str, Any]: + function_name = str(row["name"]) + result: Dict[str, Any] = { + "function": function_name, + "symbol_name": row["symbol_name"], + "objdiff_status": row["status"], + "objdiff_match_percent": row["match_percent"], + "unmatched_bytes_est": row["unmatched_bytes_est"], + "size": row["size"], + } + + try: + original_block = choose_dwarf_function_block( + original_funcs, function_name, "original DWARF" + ) + rebuilt_block = choose_dwarf_function_block( + rebuilt_funcs, function_name, "rebuilt DWARF" + ) + original_lines = normalize_dwarf_block(original_block[3]) + rebuilt_lines = normalize_dwarf_block(rebuilt_block[3]) + matcher = difflib.SequenceMatcher(a=original_lines, b=rebuilt_lines) + counts = count_dwarf_opcodes(matcher.get_opcodes()) + total_lines = max(len(original_lines), len(rebuilt_lines), 1) + result.update( + { + "dwarf_status": "exact" + if original_lines == rebuilt_lines + else "mismatch", + "dwarf_match_percent": 100.0 * counts["matching_lines"] / total_lines, + "changed_groups": counts["changed_groups"], + "matching_lines": counts["matching_lines"], + "total_lines": total_lines, + "original_line_count": len(original_lines), + "rebuilt_line_count": len(rebuilt_lines), + "signature_match": normalize_dwarf_line(original_block[2]) + == normalize_dwarf_line(rebuilt_block[2]), + } + ) + except WorkflowError as e: + result.update( + { + "dwarf_status": "error", + "dwarf_match_percent": None, + "changed_groups": None, + "matching_lines": None, + "total_lines": None, + "original_line_count": None, + "rebuilt_line_count": None, + "signature_match": None, + "error": str(e), + } + ) + return result + + +def filter_dwarf_scan_rows( + rows: Sequence[Dict[str, Any]], dwarf_status: str +) -> List[Dict[str, Any]]: + if dwarf_status == "all": + return list(rows) + if dwarf_status == "problem": + return [row for row in rows if row["dwarf_status"] in ("mismatch", "error")] + return [row for row in rows if row["dwarf_status"] == dwarf_status] + + +def filter_dwarf_signature_rows( + rows: Sequence[Dict[str, Any]], signature_status: str +) -> List[Dict[str, Any]]: + if signature_status == "all": + return list(rows) + want_match = signature_status == "match" + return [ + row + for row in rows + if row.get("signature_match") is not None + and bool(row["signature_match"]) == want_match + ] + + +def sort_dwarf_scan_rows(rows: List[Dict[str, Any]]) -> None: + status_rank = {"error": 0, "mismatch": 1, "exact": 2} + rows.sort( + key=lambda row: ( + status_rank.get(str(row["dwarf_status"]), 3), + row["dwarf_match_percent"] + if row["dwarf_match_percent"] is not None + else -1.0, + 0 + if row.get("signature_match") is True + else 1 + if row.get("signature_match") is False + else 2, + -(row["changed_groups"] or 0), + -(row["unmatched_bytes_est"] or 0), + row["objdiff_match_percent"] + if row["objdiff_match_percent"] is not None + else -1.0, + row["function"].lower(), + ) + ) + + +def command_dwarf_scan(args: argparse.Namespace) -> None: + ensure_decomp_prereqs() + if not args.json: + print_section(f"DWARF Scan: {args.unit}") + ensure_shared_unit_output(args.unit) + + rebuilt_dwarf_path = ( + os.path.abspath(args.rebuilt_dwarf_file) if args.rebuilt_dwarf_file else None + ) + cleanup_rebuilt_dwarf = False + try: + if not rebuilt_dwarf_path: + rebuilt_dwarf_path = dtk_dwarf_dump(get_unit_build_output(args.unit)) + cleanup_rebuilt_dwarf = True + + data = run_objdiff_json( + OBJDIFF_CLI, + args.unit, + reloc_diffs=args.reloc_diffs, + root_dir=ROOT_DIR, + ) + rows = [ + row + for row in build_objdiff_symbol_rows(data) + if row["type"] == "function" and row["side"] == "left" + ] + if args.objdiff_status != "all": + rows = [row for row in rows if row["status"] == args.objdiff_status] + if args.search: + rows = [ + row + for row in rows + if fuzzy_match(args.search, row["name"]) + or fuzzy_match(args.search, row["symbol_name"]) + ] + if not rows: + raise WorkflowError("No functions match the given filters.") + + original_funcs = load_dwarf_blocks(GC_DWARF, folder_mode=True) + rebuilt_funcs = load_dwarf_blocks( + rebuilt_dwarf_path, folder_mode=False, apply_split_fixups_in_ram=True + ) + scan_rows = [ + build_dwarf_scan_row(row, original_funcs, rebuilt_funcs) for row in rows + ] + + summary = { + "scanned_functions": len(scan_rows), + "exact_functions": sum( + 1 for row in scan_rows if row["dwarf_status"] == "exact" + ), + "mismatch_functions": sum( + 1 for row in scan_rows if row["dwarf_status"] == "mismatch" + ), + "error_functions": sum( + 1 for row in scan_rows if row["dwarf_status"] == "error" + ), + "byte_matched_dwarf_problems": sum( + 1 + for row in scan_rows + if row["objdiff_status"] == "match" + and row["dwarf_status"] in ("mismatch", "error") + ), + "signature_mismatch_functions": sum( + 1 for row in scan_rows if row.get("signature_match") is False + ), + } + + filtered_rows = filter_dwarf_scan_rows(scan_rows, args.dwarf_status) + filtered_rows = filter_dwarf_signature_rows( + filtered_rows, args.signature_status + ) + sort_dwarf_scan_rows(filtered_rows) + if args.limit is not None: + filtered_rows = filtered_rows[: args.limit] + + if args.json: + print( + json.dumps( + { + "unit": args.unit, + "summary": summary, + "rows": filtered_rows, + }, + indent=2, + ) + ) + return + + print( + f"Scanned {summary['scanned_functions']} function(s): " + f"{summary['exact_functions']} exact, " + f"{summary['mismatch_functions']} mismatched, " + f"{summary['error_functions']} errors." + ) + print( + "Byte-matched but DWARF-problem functions: " + f"{summary['byte_matched_dwarf_problems']}" + ) + print( + "Signature-mismatch functions: " + f"{summary['signature_mismatch_functions']}" + ) + + if not filtered_rows: + print("No functions match the given filters.") + return + + print() + print( + f"{'DSTAT':<8} {'DWARF':>7} {'SIG':>3} {'CHG':>4} {'OBJ':>7} {'OSTAT':<10} {'UNM':>6} FUNCTION" + ) + print("-" * 120) + for row in filtered_rows: + dwarf_percent = ( + f"{row['dwarf_match_percent']:.1f}%" + if row["dwarf_match_percent"] is not None + else "ERR" + ) + objdiff_percent = ( + f"{row['objdiff_match_percent']:.1f}%" + if row["objdiff_match_percent"] is not None + else "-" + ) + changed_groups = ( + str(row["changed_groups"]) if row["changed_groups"] is not None else "-" + ) + signature_state = ( + "yes" + if row.get("signature_match") is True + else "no" + if row.get("signature_match") is False + else "-" + ) + print( + f"{row['dwarf_status']:<8} {dwarf_percent:>7} {signature_state:>3} {changed_groups:>4} " + f"{objdiff_percent:>7} {row['objdiff_status']:<10} " + f"{row['unmatched_bytes_est']:>5}B {row['function']}" + ) + if args.show_errors and row.get("error"): + first_line = str(row["error"]).splitlines()[0] + print(f" error: {first_line}") + + print() + print( + "Tip: focus matched-byte functions first with " + "`python tools/decomp-workflow.py dwarf-scan " + f"-u {shlex.quote(args.unit)} --objdiff-status match`" + ) + if summary["signature_mismatch_functions"]: + print( + "Tip: add `--signature-status match` to focus body/local DWARF mismatches " + "instead of signature-only trouble." + ) + finally: + if cleanup_rebuilt_dwarf: + maybe_remove(rebuilt_dwarf_path) + + def lookup_symbol_address(symbols_file: str, mangled_name: str) -> Optional[str]: if not os.path.exists(symbols_file): return None @@ -395,6 +784,14 @@ def build_shared_unit_cached(unit: str) -> str: except WorkflowError as e: report(False, "ghidra", str(e)) + print_section("Platform Build Inputs") + for label, abs_path, missing_detail in PLATFORM_BUILD_REQUIREMENTS: + report( + os.path.exists(abs_path), + label, + describe_path(abs_path) if os.path.exists(abs_path) else missing_detail, + ) + print_section("Debug Symbol Checks") try: gc_addr = lookup_symbol_address(GC_SYMBOLS, DEBUG_SYMBOL_PROBE_MANGLED) @@ -642,6 +1039,9 @@ def command_function(args: argparse.Namespace) -> None: ensure_decomp_prereqs() print_section(f"Function Workflow: {args.function}") ensure_shared_unit_output(args.unit) + resolved_function_name = resolve_exact_function_name( + args.unit, args.function, reloc_diffs=args.reloc_diffs + ) cmd = python_tool("decomp-context.py", "-u", args.unit, "-f", args.function) if args.no_source: cmd.append("--no-source") @@ -661,9 +1061,14 @@ def command_function(args: argparse.Namespace) -> None: print(flush=True) print( "Required completion check: python tools/decomp-workflow.py verify " - f"-u {shlex.quote(args.unit)} -f {shlex.quote(args.function)}", + f"-u {shlex.quote(args.unit)} -f {shlex.quote(resolved_function_name)}", flush=True, ) + if resolved_function_name != args.function: + print( + f"(Resolved exact function name for DWARF-safe follow-up: {resolved_function_name})", + flush=True, + ) def command_unit(args: argparse.Namespace) -> None: @@ -810,8 +1215,11 @@ def command_dwarf(args: argparse.Namespace) -> None: print_section(f"DWARF Workflow: {args.unit} / {args.function}") if not args.rebuilt_dwarf_file: ensure_shared_unit_output(args.unit) + resolved_function_name = resolve_exact_function_name(args.unit, args.function) - cmd: List[str] = python_tool("dwarf-compare.py", "-u", args.unit, "-f", args.function) + cmd: List[str] = python_tool( + "dwarf-compare.py", "-u", args.unit, "-f", resolved_function_name + ) if args.summary: cmd.append("--summary") if args.json: @@ -833,18 +1241,24 @@ def command_verify(args: argparse.Namespace) -> None: ensure_shared_unit_output(args.unit) objdiff_row = choose_objdiff_row(args.unit, args.function, reloc_diffs=args.reloc_diffs) - dwarf_report = load_dwarf_report( - args.unit, - args.function, - rebuilt_dwarf_file=args.rebuilt_dwarf_file, - ) + resolved_function_name = str(objdiff_row["name"]) + dwarf_load_error: Optional[str] = None + dwarf_report: Optional[Dict[str, Any]] = None + try: + dwarf_report = load_dwarf_report( + args.unit, + resolved_function_name, + rebuilt_dwarf_file=args.rebuilt_dwarf_file, + ) + except WorkflowError as e: + dwarf_load_error = str(e) objdiff_exact = ( objdiff_row["status"] == "match" and objdiff_row["match_percent"] is not None and float(objdiff_row["match_percent"]) >= 100.0 ) - dwarf_exact = bool(dwarf_report["normalized_exact_match"]) + dwarf_exact = bool(dwarf_report["normalized_exact_match"]) if dwarf_report else False overall_ok = objdiff_exact and dwarf_exact objdiff_percent = ( @@ -852,34 +1266,56 @@ def command_verify(args: argparse.Namespace) -> None: if objdiff_row["match_percent"] is not None else "-" ) - dwarf_percent = f"{float(dwarf_report['match_percent']):.1f}%" + dwarf_percent = ( + f"{float(dwarf_report['match_percent']):.1f}%" if dwarf_report else "-" + ) print( f"objdiff: {'PASS' if objdiff_exact else 'FAIL'} | " f"{objdiff_percent} | status={objdiff_row['status']} | " f"unmatched~{objdiff_row['unmatched_bytes_est']}B" ) - print( - f"DWARF: {'PASS' if dwarf_exact else 'FAIL'} | " - f"{dwarf_percent} | normalized exact={'yes' if dwarf_exact else 'no'} | " - f"change groups={dwarf_report['changed_groups']}" - ) + if dwarf_report: + print( + f"DWARF: {'PASS' if dwarf_exact else 'FAIL'} | " + f"{dwarf_percent} | normalized exact={'yes' if dwarf_exact else 'no'} | " + f"change groups={dwarf_report['changed_groups']}" + ) + else: + print("DWARF: FAIL | unable to compare rebuilt vs original DWARF", flush=True) + if resolved_function_name != args.function: + print(f"Resolved DWARF symbol: {resolved_function_name}") print(f"Overall: {'PASS' if overall_ok else 'FAIL'}") print("Done means both objdiff and normalized DWARF are exact for the function.") if overall_ok: return + if dwarf_load_error: + print(flush=True) + print("DWARF compare could not complete:", flush=True) + print(dwarf_load_error, flush=True) + if ( + objdiff_row["status"] == "missing" + and "rebuilt DWARF: function" in dwarf_load_error + and "not found" in dwarf_load_error + ): + print( + "Hint: the rebuilt object does not contain this function yet. " + "Implement the function or fix its ownership/signature first, then rerun verify.", + flush=True, + ) + print(flush=True) print("Follow-up commands:", flush=True) print( f" python tools/decomp-workflow.py diff -u {shlex.quote(args.unit)} " - f"-d {shlex.quote(args.function)}", + f"-d {shlex.quote(resolved_function_name)}", flush=True, ) print( f" python tools/decomp-workflow.py dwarf -u {shlex.quote(args.unit)} " - f"-f {shlex.quote(args.function)}", + f"-f {shlex.quote(resolved_function_name)}", flush=True, ) raise WorkflowError( @@ -897,7 +1333,7 @@ def build_parser() -> argparse.ArgumentParser: health = subparsers.add_parser( "health", - help="Check whether the current worktree is ready for GC and PS2 decomp work", + help="Check whether the current worktree is ready for GC, Xbox, and PS2 work", ) health.add_argument( "--full", @@ -941,7 +1377,12 @@ def build_parser() -> argparse.ArgumentParser: help="Run decomp-context.py for one function", ) function.add_argument("-u", "--unit", required=True, help="Translation unit name") - function.add_argument("-f", "--function", required=True, help="Function name to inspect") + function.add_argument( + "-f", + "--function", + required=True, + help="Function name to inspect (full name or a unique substring)", + ) function.add_argument( "--no-source", action="store_true", @@ -1086,7 +1527,12 @@ def build_parser() -> argparse.ArgumentParser: help="Compare original vs rebuilt DWARF for one function", ) dwarf.add_argument("-u", "--unit", required=True, help="Translation unit name") - dwarf.add_argument("-f", "--function", required=True, help="Function name to compare") + dwarf.add_argument( + "-f", + "--function", + required=True, + help="Function name to compare (full name or a unique substring)", + ) dwarf.add_argument( "--summary", action="store_true", @@ -1122,12 +1568,72 @@ def build_parser() -> argparse.ArgumentParser: ) dwarf.set_defaults(func=command_dwarf) + dwarf_scan = subparsers.add_parser( + "dwarf-scan", + help="Scan one translation unit and rank per-function DWARF problem areas", + ) + dwarf_scan.add_argument("-u", "--unit", required=True, help="Translation unit name") + dwarf_scan.add_argument( + "--search", + help="Only include functions whose name or symbol contains this text", + ) + dwarf_scan.add_argument( + "--objdiff-status", + choices=["all", "match", "nonmatching", "missing"], + default="all", + help="Filter functions by objdiff status before scanning (default: all)", + ) + dwarf_scan.add_argument( + "--dwarf-status", + choices=["all", "problem", "exact", "mismatch", "error"], + default="problem", + help="Filter scan results by DWARF outcome after scanning (default: problem)", + ) + dwarf_scan.add_argument( + "--signature-status", + choices=["all", "match", "mismatch"], + default="all", + help="Filter scan results by whether the DWARF signature already matches (default: all)", + ) + dwarf_scan.add_argument( + "--limit", + type=int, + default=20, + help="Maximum rows to print after sorting (default: 20)", + ) + dwarf_scan.add_argument( + "--json", + action="store_true", + help="Print the scan summary and rows as JSON", + ) + dwarf_scan.add_argument( + "--show-errors", + action="store_true", + help="Print one-line error details under rows that could not be compared", + ) + dwarf_scan.add_argument( + "--reloc-diffs", + choices=RELOC_DIFF_CHOICES, + default="none", + help="Pass through objdiff relocation diff mode when loading unit symbols", + ) + dwarf_scan.add_argument( + "--rebuilt-dwarf-file", + help="Use an existing rebuilt DWARF dump instead of dumping the unit object", + ) + dwarf_scan.set_defaults(func=command_dwarf_scan) + verify = subparsers.add_parser( "verify", help="Fail unless one function matches in both objdiff and DWARF", ) verify.add_argument("-u", "--unit", required=True, help="Translation unit name") - verify.add_argument("-f", "--function", required=True, help="Function name to verify") + verify.add_argument( + "-f", + "--function", + required=True, + help="Function name to verify (full name or a unique substring)", + ) verify.add_argument( "--reloc-diffs", choices=RELOC_DIFF_CHOICES, diff --git a/tools/dwarf-compare.py b/tools/dwarf-compare.py index 50b2a6351..6c6837b0e 100644 --- a/tools/dwarf-compare.py +++ b/tools/dwarf-compare.py @@ -18,6 +18,7 @@ import json import os import re +import shlex import shutil import sqlite3 import sys @@ -153,6 +154,22 @@ def load_function_blocks( return split_functions(text) +def dump_has_processing_errors(path: str) -> bool: + try: + return "// ERROR: Failed to process tag " in read_text(path) + except OSError: + return False + + +def append_raw_tree_hint(message: str, unit_name: str, function_name: str) -> str: + hint = ( + "\nRaw .debug inspection may help:\n" + f" python tools/dwarf1_subroutine_tree.py -u {shlex.quote(unit_name)} " + f"-f {shlex.quote(function_name)}" + ) + return message + hint + + def find_function_blocks(funcs: Iterable[FunctionBlock], query: str) -> List[FunctionBlock]: candidates = _candidate_func_names(query) matches: List[FunctionBlock] = [] @@ -954,7 +971,14 @@ def main() -> None: ) original_block = choose_function_block(original_funcs, args.function, "original DWARF") - rebuilt_block = choose_function_block(rebuilt_funcs, args.function, "rebuilt DWARF") + try: + rebuilt_block = choose_function_block(rebuilt_funcs, args.function, "rebuilt DWARF") + except DwarfCompareError as exc: + if rebuilt_dwarf_path and dump_has_processing_errors(rebuilt_dwarf_path): + raise DwarfCompareError( + append_raw_tree_hint(str(exc), args.unit, args.function) + ) from exc + raise report = build_report( args.unit, diff --git a/tools/dwarf1_subroutine_tree.py b/tools/dwarf1_subroutine_tree.py new file mode 100644 index 000000000..099468b28 --- /dev/null +++ b/tools/dwarf1_subroutine_tree.py @@ -0,0 +1,935 @@ +#!/usr/bin/env python3 + +""" +Inspect raw MWCC DWARF 1.1 subroutine trees directly from .debug. + +This is useful when `dtk dwarf dump` fails to emit a top-level function block, +but the raw DIE tree is still present in the rebuilt object. + +Examples: + python tools/dwarf1_subroutine_tree.py -u main/Speed/Indep/SourceLists/zAttribSys -f 'Attrib::Class::RemoveCollection(Attrib::Collection *)' + python tools/dwarf1_subroutine_tree.py build/GOWE69/src/Speed/Indep/SourceLists/zAttribSys.o --tag 0x2A2C8 + python tools/dwarf1_subroutine_tree.py -u main/Speed/Indep/SourceLists/zAttribSys -f 'Attrib::Database::RemoveClass(Attrib::Class const *)' --json + python tools/dwarf1_subroutine_tree.py -u main/Speed/Indep/SourceLists/zAttribSys -f 'Attrib::Class::RemoveCollection(Attrib::Collection *)' --compare-original + python tools/dwarf1_subroutine_tree.py -u main/Speed/Indep/SourceLists/zAttribSys -f 'Attrib::Class::RemoveCollection(Attrib::Collection *)' --show-non-subroutine + +The default tree intentionally compares by owner + bare name, which is great for fast +owner-drift checks but can hide overload-only mismatches. When an exact-text candidate +adds an overloaded inline helper or otherwise reuses the same base name with different +parameters/locals, use `--show-non-subroutine` (and `--show-mangled` when available) to +inspect the rebuilt formal-parameter/local rows directly before trusting a +`--compare-original` pass. +""" + +from __future__ import annotations + +import argparse +import difflib +import json +import re +from dataclasses import dataclass +from enum import IntEnum +from io import BytesIO +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional + +from elftools.elf.elffile import ELFFile +from elftools.elf.relocation import RelocationSection + +from _common import find_objdiff_unit, load_objdiff_config, make_abs +from lookup import split_functions + + +class TreeError(RuntimeError): + pass + + +class Endian: + def __init__(self, little: bool): + self.little = little + + def u16(self, data: bytes) -> int: + return int.from_bytes(data, "little" if self.little else "big") + + def u32(self, data: bytes) -> int: + return int.from_bytes(data, "little" if self.little else "big") + + def u64(self, data: bytes) -> int: + return int.from_bytes(data, "little" if self.little else "big") + + +class TagKind(IntEnum): + PADDING = 0x0000 + FORMAL_PARAMETER = 0x0005 + GLOBAL_SUBROUTINE = 0x0006 + LABEL = 0x000A + LEXICAL_BLOCK = 0x000B + LOCAL_VARIABLE = 0x000C + COMPILE_UNIT = 0x0011 + SUBROUTINE = 0x0014 + INLINED_SUBROUTINE = 0x001D + + +class AttributeKind(IntEnum): + SIBLING = 0x0010 | 0x2 + LOCATION = 0x0020 | 0x3 + NAME = 0x0030 | 0x8 + FUND_TYPE = 0x0050 | 0x5 + MOD_FUND_TYPE = 0x0060 | 0x3 + USER_DEF_TYPE = 0x0070 | 0x2 + MOD_UD_TYPE = 0x0080 | 0x3 + LOW_PC = 0x0110 | 0x1 + HIGH_PC = 0x0120 | 0x1 + MEMBER = 0x0140 | 0x2 + MW_MANGLED = 0x2000 | 0x8 + SPECIFICATION = 0x02B0 | 0x2 + + +class FormKind(IntEnum): + ADDR = 0x1 + REF = 0x2 + BLOCK2 = 0x3 + BLOCK4 = 0x4 + DATA2 = 0x5 + DATA4 = 0x6 + DATA8 = 0x7 + STRING = 0x8 + + +FORM_MASK = 0xF + + +@dataclass +class Attribute: + kind: int + value: object + + +@dataclass +class Tag: + key: int + kind: int + attributes: List[Attribute] + + +def unit_output_path(unit_name: str) -> str: + config = load_objdiff_config() + unit = find_objdiff_unit(config, unit_name) + if unit is None: + raise TreeError(f"Unit not found in objdiff.json: {unit_name}") + + target = unit.get("base_path") or unit.get("target_path") + if not target: + raise TreeError(f"Unit has no build target in objdiff.json: {unit_name}") + return make_abs(str(target)) or str(target) + + +def normalize_query(name: str) -> str: + bare = name.strip() + paren = bare.find("(") + if paren != -1: + bare = bare[:paren] + return bare.strip() + + +def candidate_names(name: str) -> List[str]: + bare = normalize_query(name) + if not bare: + return [] + parts = bare.split("::") + out: List[str] = [] + for index in range(len(parts)): + candidate = "::".join(parts[index:]).strip() + if candidate and candidate not in out: + out.append(candidate) + return out + + +def read_string(f: BytesIO) -> str: + data = bytearray() + while True: + b = f.read(1) + if not b or b == b"\0": + break + data.extend(b) + return data.decode("latin1") + + +def apply_debug_relocations(elf: ELFFile, section_name: str = ".debug") -> bytes: + debug_section = elf.get_section_by_name(section_name) + if debug_section is None: + raise TreeError(f"Object is missing {section_name}") + data = bytearray(debug_section.data()) + + symtabs = [ + section + for section in elf.iter_sections() + if section.header["sh_type"] in ("SHT_SYMTAB", "SHT_DYNSYM") + ] + + def get_symbol(index: int): + for symtab in symtabs: + if index < symtab.num_symbols(): + return symtab.get_symbol(index) + return None + + for section in elf.iter_sections(): + if not isinstance(section, RelocationSection): + continue + target = elf.get_section(section["sh_info"]) + if target.name != section_name: + continue + + for reloc in section.iter_relocations(): + reloc_type = reloc.entry["r_info_type"] + if reloc_type == 0: + continue + if reloc_type != 1: + raise TreeError( + f"Unhandled {section_name} relocation type {reloc_type} in {section.name}" + ) + symbol = get_symbol(reloc.entry["r_info_sym"]) + addend = reloc.entry.get("r_addend", 0) + value = (symbol["st_value"] if symbol is not None else 0) + addend + offset = reloc["r_offset"] + data[offset : offset + 4] = int(value).to_bytes(4, "big") + + return bytes(data) + + +def read_attribute(f: BytesIO, endian: Endian) -> Attribute: + kind = endian.u16(f.read(2)) + form = FormKind(kind & FORM_MASK) + if form in (FormKind.ADDR, FormKind.REF, FormKind.DATA4): + value = endian.u32(f.read(4)) + elif form == FormKind.DATA2: + value = endian.u16(f.read(2)) + elif form == FormKind.DATA8: + value = endian.u64(f.read(8)) + elif form == FormKind.BLOCK2: + size = endian.u16(f.read(2)) + value = f.read(size) + elif form == FormKind.BLOCK4: + size = endian.u32(f.read(4)) + value = f.read(size) + elif form == FormKind.STRING: + value = read_string(f) + else: + raise TreeError(f"Unhandled attribute form {form}") + return Attribute(kind=kind, value=value) + + +def read_tags(data: bytes, endian: Endian) -> Dict[int, Tag]: + f = BytesIO(data) + tags: Dict[int, Tag] = {} + while f.tell() < len(data): + position = f.tell() + size = endian.u32(f.read(4)) + if size < 8: + if size > 4: + f.seek(size - 4, 1) + tags[position] = Tag(key=position, kind=TagKind.PADDING, attributes=[]) + continue + + kind = endian.u16(f.read(2)) + end = position + size + attributes: List[Attribute] = [] + while f.tell() < end: + attributes.append(read_attribute(f, endian)) + tags[position] = Tag(key=position, kind=kind, attributes=attributes) + return tags + + +def attribute_value(tag: Tag, kind: AttributeKind) -> Optional[object]: + for attr in tag.attributes: + if attr.kind == kind: + return attr.value + return None + + +def tag_name(kind: int) -> str: + try: + return TagKind(kind).name + except ValueError: + return f"0x{kind:X}" + + +def sorted_keys(tags: Dict[int, Tag]) -> List[int]: + return sorted(tags.keys()) + + +def next_sibling_key(tags: Dict[int, Tag], keys: List[int], key: int) -> Optional[int]: + sibling = attribute_value(tags[key], AttributeKind.SIBLING) + if isinstance(sibling, int): + return sibling + index = keys.index(key) + if index + 1 < len(keys): + return keys[index + 1] + return None + + +def child_keys(tags: Dict[int, Tag], keys: List[int], key: int) -> List[int]: + sibling = next_sibling_key(tags, keys, key) + index = keys.index(key) + current = keys[index + 1] if index + 1 < len(keys) else None + out: List[int] = [] + while current is not None and current != sibling: + if tags[current].kind != TagKind.PADDING: + out.append(current) + current = next_sibling_key(tags, keys, current) + return out + + +def member_owner_name(tags: Dict[int, Tag], tag: Tag) -> Optional[str]: + member_ref = attribute_value(tag, AttributeKind.MEMBER) + if not isinstance(member_ref, int): + return None + owner_tag = tags.get(member_ref) + if owner_tag is None: + return None + owner_name = attribute_value(owner_tag, AttributeKind.NAME) + return owner_name if isinstance(owner_name, str) else None + + +def resolved_name(tags: Dict[int, Tag], tag: Tag) -> Optional[str]: + name = attribute_value(tag, AttributeKind.NAME) + if isinstance(name, str): + return name + spec_ref = attribute_value(tag, AttributeKind.SPECIFICATION) + if isinstance(spec_ref, int) and spec_ref in tags: + spec_name = attribute_value(tags[spec_ref], AttributeKind.NAME) + if isinstance(spec_name, str): + return spec_name + return None + + +def resolved_mangled_name(tags: Dict[int, Tag], tag: Tag) -> Optional[str]: + mangled = attribute_value(tag, AttributeKind.MW_MANGLED) + if isinstance(mangled, str): + return mangled + spec_ref = attribute_value(tag, AttributeKind.SPECIFICATION) + if isinstance(spec_ref, int) and spec_ref in tags: + spec_mangled = attribute_value(tags[spec_ref], AttributeKind.MW_MANGLED) + if isinstance(spec_mangled, str): + return spec_mangled + return None + + +def resolved_owner_name(tags: Dict[int, Tag], tag: Tag) -> Optional[str]: + owner = member_owner_name(tags, tag) + if owner: + return owner + spec_ref = attribute_value(tag, AttributeKind.SPECIFICATION) + if isinstance(spec_ref, int) and spec_ref in tags: + return member_owner_name(tags, tags[spec_ref]) + return None + + +def qualified_name(tags: Dict[int, Tag], tag: Tag) -> str: + name = resolved_name(tags, tag) or "" + owner = resolved_owner_name(tags, tag) + return f"{owner}::{name}" if owner else name + + +def tag_range(tag: Tag) -> tuple[Optional[int], Optional[int]]: + low = attribute_value(tag, AttributeKind.LOW_PC) + high = attribute_value(tag, AttributeKind.HIGH_PC) + return ( + low if isinstance(low, int) else None, + high if isinstance(high, int) else None, + ) + + +def top_level_subroutines(tags: Dict[int, Tag]) -> List[Tag]: + return [ + tag + for tag in tags.values() + if tag.kind in (TagKind.GLOBAL_SUBROUTINE, TagKind.SUBROUTINE) + and isinstance(attribute_value(tag, AttributeKind.LOW_PC), int) + and isinstance(attribute_value(tag, AttributeKind.HIGH_PC), int) + ] + + +def choose_subroutine(tags: Dict[int, Tag], query: Optional[str], tag_offset: Optional[int]) -> Tag: + if tag_offset is not None: + tag = tags.get(tag_offset) + if tag is None: + raise TreeError(f"No tag at offset 0x{tag_offset:X}") + if tag.kind not in (TagKind.GLOBAL_SUBROUTINE, TagKind.SUBROUTINE): + raise TreeError( + f"Tag 0x{tag_offset:X} is {tag_name(tag.kind)}, not a top-level subroutine" + ) + return tag + + if not query: + raise TreeError("Either --function or --tag is required") + + matches: List[Tag] = [] + exact: List[Tag] = [] + for tag in top_level_subroutines(tags): + name = qualified_name(tags, tag) + if query in name: + exact.append(tag) + if any(candidate in name for candidate in candidate_names(query)): + matches.append(tag) + + selected = exact or matches + if not selected: + raise TreeError(f"Top-level raw subroutine '{query}' not found") + if len(selected) > 1: + names = "\n".join(f" - {qualified_name(tags, tag)}" for tag in selected[:12]) + raise TreeError( + f"Raw subroutine query '{query}' matched multiple functions.\n{names}" + ) + return selected[0] + + +def block_label(tags: Dict[int, Tag], tag: Tag) -> str: + if tag.kind == TagKind.LEXICAL_BLOCK: + return "" + return qualified_name(tags, tag) + + +def render_tree( + tags: Dict[int, Tag], + tag: Tag, + *, + max_depth: Optional[int] = None, + include_non_subroutine: bool = False, +) -> List[Dict[str, Any]]: + keys = sorted_keys(tags) + + def walk(current: Tag, depth: int, out: List[Dict[str, Any]]) -> None: + if max_depth is not None and depth > max_depth: + return + low, high = tag_range(current) + out.append( + { + "tag_offset": current.key, + "tag_kind": tag_name(current.kind), + "depth": depth, + "label": block_label(tags, current), + "owner": resolved_owner_name(tags, current), + "name": resolved_name(tags, current), + "mangled": resolved_mangled_name(tags, current), + "low_pc": low, + "high_pc": high, + "specification": attribute_value(current, AttributeKind.SPECIFICATION), + } + ) + + for child_key in child_keys(tags, keys, current.key): + child = tags[child_key] + if child.kind in (TagKind.INLINED_SUBROUTINE, TagKind.LEXICAL_BLOCK): + walk(child, depth + 1, out) + elif include_non_subroutine and child.kind in ( + TagKind.FORMAL_PARAMETER, + TagKind.LOCAL_VARIABLE, + TagKind.LABEL, + ): + low_pc, high_pc = tag_range(child) + out.append( + { + "tag_offset": child.key, + "tag_kind": tag_name(child.kind), + "depth": depth + 1, + "label": block_label(tags, child), + "owner": resolved_owner_name(tags, child), + "name": resolved_name(tags, child), + "mangled": resolved_mangled_name(tags, child), + "low_pc": low_pc, + "high_pc": high_pc, + "specification": attribute_value(child, AttributeKind.SPECIFICATION), + } + ) + + rows: List[Dict[str, Any]] = [] + walk(tag, 0, rows) + return rows + + +def format_range(low_pc: Optional[int], high_pc: Optional[int]) -> str: + if low_pc is None or high_pc is None: + return "-------- --------" + return f"{low_pc:04X}-{high_pc:04X}" + + +def print_tree(rows: Iterable[Dict[str, Any]], *, show_mangled: bool = False) -> None: + for row in rows: + indent = " " * int(row["depth"]) + spec = row["specification"] + spec_suffix = f" [spec=0x{spec:X}]" if isinstance(spec, int) else "" + mangled = row.get("mangled") + mangled_suffix = ( + f" [mangled={mangled}]" + if show_mangled and isinstance(mangled, str) and mangled + else "" + ) + print( + f"{indent}{format_range(row['low_pc'], row['high_pc'])} " + f"{row['label']}{spec_suffix}{mangled_suffix}" + ) + + +def find_original_function_block(query: str) -> tuple[str, str, str, str]: + dwarf_path = Path(make_abs("symbols/Dwarf/functions.nothpp") or "symbols/Dwarf/functions.nothpp") + try: + text = dwarf_path.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + raise TreeError(f"Failed to read original DWARF dump: {dwarf_path}") from exc + + matches: List[tuple[str, str, str, str]] = [] + exact: List[tuple[str, str, str, str]] = [] + for func in split_functions(text): + sig_line = func[2] + if query in sig_line: + exact.append(func) + if any(candidate in sig_line for candidate in candidate_names(query)): + matches.append(func) + + selected = exact or matches + if not selected: + raise TreeError(f"Original DWARF function '{query}' not found in {dwarf_path}") + if len(selected) > 1: + preview = "\n".join(f" - {func[2]}" for func in selected[:12]) + raise TreeError( + f"Original DWARF query '{query}' matched multiple functions.\n{preview}" + ) + return selected[0] + + +def normalize_original_label(sig_line: str) -> str: + line = sig_line.strip() + if not line: + return "" + + if line.endswith("{}"): + line = line[:-2].rstrip() + if line.endswith("{"): + line = line[:-1].rstrip() + if line.endswith(" const"): + line = line[:-6].rstrip() + + while line.startswith("inline "): + line = line[len("inline ") :].lstrip() + while line.startswith("static "): + line = line[len("static ") :].lstrip() + while line.startswith("inline "): + line = line[len("inline ") :].lstrip() + + paren = line.find("(") + if paren == -1: + return "" + prefix = line[:paren].rstrip() + if not prefix: + return "" + + depth = 0 + for index in range(len(prefix) - 1, -1, -1): + char = prefix[index] + if char == ">": + depth += 1 + elif char == "<": + depth = max(0, depth - 1) + elif depth == 0 and char.isspace(): + candidate = prefix[index + 1 :].strip() + return candidate or "" + return prefix + + +def split_top_level_commas(text: str) -> List[str]: + parts: List[str] = [] + depth_angle = 0 + depth_paren = 0 + start = 0 + for index, char in enumerate(text): + if char == "<": + depth_angle += 1 + elif char == ">": + depth_angle = max(0, depth_angle - 1) + elif char == "(": + depth_paren += 1 + elif char == ")": + depth_paren = max(0, depth_paren - 1) + elif char == "," and depth_angle == 0 and depth_paren == 0: + parts.append(text[start:index].strip()) + start = index + 1 + tail = text[start:].strip() + if tail: + parts.append(tail) + return parts + + +def extract_decl_name(line: str) -> Optional[str]: + candidate = re.sub(r"/\*.*?\*/", "", line).split("//", 1)[0].strip() + if not candidate or candidate.startswith("/*"): + return None + if candidate.endswith("{}"): + return None + if candidate.endswith("{"): + candidate = candidate[:-1].rstrip() + if "=" in candidate: + candidate = candidate.split("=", 1)[0].rstrip() + if candidate.endswith(";"): + candidate = candidate[:-1].rstrip() + match = re.search(r"([A-Za-z_]\w*)\s*(?:\[[^\]]*\])?$", candidate) + if not match: + return None + name = match.group(1) + if name in {"const", "volatile", "unsigned", "signed", "struct", "class"}: + return None + return name + + +def original_param_names(sig_line: str) -> List[str]: + line = re.sub(r"/\*.*?\*/", "", sig_line).strip() + if line.endswith("{}"): + line = line[:-2].rstrip() + if line.endswith("{"): + line = line[:-1].rstrip() + + is_static = False + changed = True + while changed: + changed = False + if line.startswith("static "): + line = line[len("static ") :].lstrip() + is_static = True + changed = True + if line.startswith("inline "): + line = line[len("inline ") :].lstrip() + changed = True + + paren = line.find("(") + close = line.rfind(")") + if paren == -1 or close == -1 or close < paren: + return [] + + names: List[str] = [] + if not is_static and "::" in line[:paren]: + names.append("this") + + params_text = line[paren + 1 : close].strip() + if not params_text or params_text == "void": + return names + + for part in split_top_level_commas(params_text): + name = extract_decl_name(part) + if name: + names.append(name) + return names + + +def original_rows_from_block( + block: tuple[str, str, str, str], + function_label: str, + *, + max_depth: Optional[int] = None, + include_non_subroutine: bool = False, +) -> List[Dict[str, Any]]: + rows: List[Dict[str, Any]] = [ + { + "tag_kind": "ORIGINAL_FUNCTION", + "depth": 0, + "label": function_label, + "low_pc": int(block[0], 16), + "high_pc": int(block[1], 16), + } + ] + if include_non_subroutine: + for name in original_param_names(block[2]): + rows.append( + { + "tag_kind": "ORIGINAL_PARAM", + "depth": 1, + "label": name, + "low_pc": int(block[0], 16), + "high_pc": int(block[1], 16), + } + ) + + lines = block[3].splitlines() + range_re = re.compile( + r"^(\s*)// Range:\s+0x([0-9A-Fa-f]+)\s*->\s*0x([0-9A-Fa-f]+)\s*$" + ) + for index, line in enumerate(lines): + if include_non_subroutine and line.strip() == "// Local variables": + depth = len(line) - len(line.lstrip(" ")) + local_depth = depth // 4 + lookahead = index + 1 + while lookahead < len(lines): + candidate = lines[lookahead].strip() + if not candidate or candidate.startswith("// Range:"): + break + if candidate.startswith("//"): + lookahead += 1 + continue + name = extract_decl_name(candidate) + if name: + rows.append( + { + "tag_kind": "ORIGINAL_LOCAL", + "depth": local_depth, + "label": name, + "low_pc": 0, + "high_pc": 0, + } + ) + lookahead += 1 + + match = range_re.match(line) + if not match: + continue + + sig_line = "" + lookahead = index + 1 + while lookahead < len(lines): + candidate = lines[lookahead].strip() + if candidate: + if candidate.startswith("//"): + lookahead += 1 + continue + sig_line = candidate + break + lookahead += 1 + + depth = len(match.group(1)) // 4 + if depth == 0: + continue + if max_depth is not None and depth > max_depth: + continue + rows.append( + { + "tag_kind": "ORIGINAL_RANGE", + "depth": depth, + "label": normalize_original_label(sig_line), + "low_pc": int(match.group(2), 16) & 0xFFFF, + "high_pc": int(match.group(3), 16) & 0xFFFF, + } + ) + if include_non_subroutine: + for name in original_param_names(sig_line): + rows.append( + { + "tag_kind": "ORIGINAL_PARAM", + "depth": depth + 1, + "label": name, + "low_pc": int(match.group(2), 16) & 0xFFFF, + "high_pc": int(match.group(3), 16) & 0xFFFF, + } + ) + if normalize_original_label(sig_line) == "": + name = extract_decl_name(sig_line) + if name: + rows.append( + { + "tag_kind": "ORIGINAL_LOCAL", + "depth": depth + 1, + "label": name, + "low_pc": int(match.group(2), 16) & 0xFFFF, + "high_pc": int(match.group(3), 16) & 0xFFFF, + } + ) + + return rows + + +def compare_rows( + original_rows: List[Dict[str, Any]], rebuilt_rows: List[Dict[str, Any]] +) -> Dict[str, Any]: + original_keys = [(row["depth"], row["label"]) for row in original_rows] + rebuilt_keys = [(row["depth"], row["label"]) for row in rebuilt_rows] + + mismatches: List[Dict[str, Any]] = [] + matcher = difflib.SequenceMatcher(a=original_keys, b=rebuilt_keys, autojunk=False) + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == "equal": + continue + mismatches.append( + { + "tag": tag, + "original_range": [i1, i2], + "rebuilt_range": [j1, j2], + "original": [ + { + "depth": row["depth"], + "label": row["label"], + "low_pc": row["low_pc"], + "high_pc": row["high_pc"], + } + for row in original_rows[i1:i2] + ], + "rebuilt": [ + { + "depth": row["depth"], + "label": row["label"], + "low_pc": row["low_pc"], + "high_pc": row["high_pc"], + } + for row in rebuilt_rows[j1:j2] + ], + } + ) + + return { + "original_count": len(original_rows), + "rebuilt_count": len(rebuilt_rows), + "mismatch_count": len(mismatches), + "mismatches": mismatches, + } + + +def print_comparison(compare: Dict[str, Any]) -> None: + print() + print("Original-vs-rebuilt range-tree comparison:") + print( + f" original rows: {compare['original_count']}, rebuilt rows: {compare['rebuilt_count']}, " + f"mismatches: {compare['mismatch_count']}" + ) + if not compare["mismatches"]: + if compare["original_count"] == compare["rebuilt_count"]: + print(" range trees match by depth/label") + else: + print(" shared prefix matches by depth/label, but row counts differ") + return + + for mismatch in compare["mismatches"][:12]: + print( + f" {mismatch['tag']} " + f"original[{mismatch['original_range'][0]}:{mismatch['original_range'][1]}] " + f"rebuilt[{mismatch['rebuilt_range'][0]}:{mismatch['rebuilt_range'][1]}]" + ) + for row in mismatch["original"][:4]: + print(f" - original {row['depth']} {row['label']}") + if len(mismatch["original"]) > 4: + print(f" - ... {len(mismatch['original']) - 4} more original rows") + for row in mismatch["rebuilt"][:4]: + print(f" + rebuilt {row['depth']} {row['label']}") + if len(mismatch["rebuilt"]) > 4: + print(f" + ... {len(mismatch['rebuilt']) - 4} more rebuilt rows") + if compare["mismatch_count"] > 12: + print(f" ... {compare['mismatch_count'] - 12} more mismatches") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Print a raw MWCC DWARF 1.1 inline-subroutine tree directly from " + "an object file's .debug section." + ) + ) + parser.add_argument( + "object", + nargs="?", + help="Path to the object file to inspect. Optional when -u/--unit is used.", + ) + parser.add_argument("-u", "--unit", help="Objdiff unit name to inspect") + parser.add_argument("-f", "--function", help="Top-level function name to inspect") + parser.add_argument( + "--tag", + help="Inspect a specific raw tag offset (hex like 0x2A2C8 or decimal)", + ) + parser.add_argument( + "--max-depth", + type=int, + default=None, + help="Limit the printed tree depth", + ) + parser.add_argument( + "--show-non-subroutine", + action="store_true", + help="Also include parameters, locals, and labels in the tree output", + ) + parser.add_argument( + "--show-mangled", + action="store_true", + help="Also print MWCC mangled names when present", + ) + parser.add_argument( + "--json", + action="store_true", + help="Emit JSON instead of a text tree", + ) + parser.add_argument( + "--compare-original", + action="store_true", + help=( + "Also compare the rebuilt raw tree against the original symbols/Dwarf/functions.nothpp " + "range tree for the same function" + ), + ) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + if args.object: + obj_path = Path(args.object) + elif args.unit: + obj_path = Path(unit_output_path(args.unit)) + else: + raise TreeError("Either an object path or --unit is required") + + if not obj_path.is_file(): + raise TreeError(f"Object file not found: {obj_path}") + + tag_offset = None + if args.tag: + tag_offset = int(args.tag, 0) + + with obj_path.open("rb") as f: + elf = ELFFile(f) + data = apply_debug_relocations(elf) + tags = read_tags(data, Endian(elf.little_endian)) + + tag = choose_subroutine(tags, args.function, tag_offset) + rows = render_tree( + tags, + tag, + max_depth=args.max_depth, + include_non_subroutine=args.show_non_subroutine, + ) + compare: Optional[Dict[str, Any]] = None + if args.compare_original: + if not args.function: + raise TreeError("--compare-original requires --function") + original_block = find_original_function_block(args.function) + original_rows = original_rows_from_block( + original_block, + qualified_name(tags, tag), + max_depth=args.max_depth, + include_non_subroutine=args.show_non_subroutine, + ) + compare = compare_rows(original_rows, rows) + + if args.json: + payload = { + "object": str(obj_path), + "selected_tag": tag.key, + "selected_name": qualified_name(tags, tag), + "rows": rows, + } + if compare is not None: + payload["compare_original"] = compare + print(json.dumps(payload, indent=2)) + else: + print(f"Object: {obj_path}") + print(f"Tag: 0x{tag.key:X}") + print(f"Function: {qualified_name(tags, tag)}") + if args.show_mangled: + mangled = resolved_mangled_name(tags, tag) + if mangled: + print(f"Mangled: {mangled}") + low_pc, high_pc = tag_range(tag) + print(f"Range: {format_range(low_pc, high_pc)}") + print() + print_tree(rows, show_mangled=args.show_mangled) + if compare is not None: + print_comparison(compare) + + +if __name__ == "__main__": + try: + main() + except TreeError as exc: + raise SystemExit(str(exc)) diff --git a/tools/prodg_dump.py b/tools/prodg_dump.py new file mode 100644 index 000000000..d14595530 --- /dev/null +++ b/tools/prodg_dump.py @@ -0,0 +1,1516 @@ +#!/usr/bin/env python3 + +""" +Generate and compare ProDG compiler-state dumps for one translation unit. + +Examples: + python tools/prodg_dump.py command -u main/Speed/Indep/SourceLists/zAttribSys + python tools/prodg_dump.py dump -u main/Speed/Indep/SourceLists/zAttribSys -o /tmp/zattrib_base + python tools/prodg_dump.py extract /tmp/zattrib_base --stage lreg \ + -f 'VecHashMap::UpdateSearchLength' + python tools/prodg_dump.py trace /tmp/zattrib_base --stage greg \ + -f 'void Attrib::Database::RemoveClass(const Attrib::Class *)' --pseudo 318,319 + python tools/prodg_dump.py diff /tmp/zattrib_base /tmp/zattrib_trial \ + -f 'VecHashMap::UpdateSearchLength' + build/tools/dtk elf disasm build/GOWE69/src/Speed/Indep/SourceLists/zAttribSys.o /tmp/zattrib_objdisasm.txt + python tools/prodg_dump.py diff /tmp/zattrib_oldfloor_objdisasm.txt /tmp/zattrib_objdisasm.txt \ + -f 'RemoveCollection__Q26Attrib5ClassPQ26Attrib10Collection' +""" + +import argparse +import difflib +import os +import re +import shlex +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Sequence + +from _common import ( + ROOT_DIR, + ToolError, + ensure_project_prereqs, + find_objdiff_unit, + format_subprocess_error, + load_objdiff_config, + make_abs, +) + + +FUNCTION_HEADER_RE = re.compile(r"^;; Function (.+)$") +REGISTER_PREF_RE = re.compile(r"^Register (\d+) used .*; pref (.+)$") +REGISTER_ASSIGN_RE = re.compile(r"^;; Register (\d+) in ([^.]+)\.$") +REGISTER_CONFLICT_RE = re.compile(r"^;; (\d+) conflicts: (.+)$") +REGISTER_PREFERENCES_RE = re.compile(r"^;; (\d+) preferences: (.+)$") +REGISTER_DISPOSITION_PAIR_RE = re.compile(r"(\d+)\s+in\s+([^\s]+)") +USER_PSEUDO_RE = re.compile( + r"\(reg/(?P[a-z/]+):(?P[A-Za-z0-9_]+) (?P\d+)(?: r(?P\d+))?\)" +) +HARD_REG_RE = re.compile( + r"\(reg(?:/[a-z/]+)?:(?P[A-Za-z0-9_]+) (?P\d+) r(?P\d+)\)" +) +PLAIN_REG_RE = re.compile(r"\(reg:(?P[A-Za-z0-9_]+) (?P\d+)\)") +FRAME_SLOT_PLUS_RE = re.compile( + r"mem/f:[A-Za-z0-9_]+ \(\s*plus:[A-Za-z0-9_]+ \(\s*reg:SI (?P\d+)\)\s*" + r"\(\s*const_int (?P-?\d+) \[[^]]+\]\)\s*\) 0\)", + re.MULTILINE, +) +FRAME_SLOT_DIRECT_RE = re.compile(r"mem/f:[A-Za-z0-9_]+ \(\s*reg:SI (?P\d+)\) 0\)") +ASM_WEAK_RE = re.compile(r"^\s*\.weak\s+(\S+)$") +ASM_TYPE_RE = re.compile(r"^\s*\.type\s+(\S+),@function$") +ASM_LABEL_RE = re.compile(r"^(\S+):$") +ASM_SIZE_RE = re.compile(r"^\s*\.size\s+(\S+),") +ASM_DEBUG_SECTION_RE = re.compile(r"^\s*\.section\s+\.(line|debug_srcinfo)\b") +ASM_PREVIOUS_RE = re.compile(r"^\s*\.previous\b") +ASM_DEBUG_LABEL_RE = re.compile( + r"^(\.L_(?:LC|LE)\d+|\.L_B\d+(?:_e)?|\.L_b\d+|\.L_f\S*_s):$" +) +DTK_FN_RE = re.compile(r"^\s*\.fn\s+(\S+),") +DTK_ENDFN_RE = re.compile(r"^\s*\.endfn\s+(\S+)$") +DTK_LABEL_RE = re.compile(r"^\s*(\.L_[A-Za-z0-9_]+):$") +DTK_LABEL_TOKEN_RE = re.compile(r"\.L_[A-Za-z0-9_]+") +DTK_INSN_RE = re.compile( + r"^\s*/\*\s*[0-9A-Fa-f]{8}\s+[0-9A-Fa-f]{8}\s+" + r"(?P(?:[0-9A-Fa-f]{2}\s+)+)\*/\s*(?P.*)$" +) +DEFAULT_STAGES = ("rtl", "greg", "lreg") + + +class DumpToolError(RuntimeError): + pass + + +@dataclass +class UnitCompileInfo: + unit_name: str + source_path: Path + target_path: Path + compile_shell: str + compile_argv: List[str] + ngccc_index: int + + +@dataclass +class FunctionBlock: + header: str + start_line: int + lines: List[str] + + +@dataclass +class SummaryStats: + count: int + first_line: int + last_line: int + + +@dataclass +class StageSummary: + pseudos: Dict[tuple[int, str, str], SummaryStats] + hard_regs: Dict[int, SummaryStats] + frame_slots: Dict[tuple[int, int], SummaryStats] + compares: Dict[tuple[str, str, str], SummaryStats] + + +def print_section(title: str) -> None: + print(flush=True) + print("=" * 60, flush=True) + print(f" {title}", flush=True) + print("=" * 60, flush=True) + + +def fail(message: str) -> None: + print(message, file=sys.stderr) + sys.exit(1) + + +def run_capture(cmd: Sequence[str]) -> subprocess.CompletedProcess[str]: + result = subprocess.run(cmd, cwd=ROOT_DIR, text=True, capture_output=True) + if result.returncode != 0: + raise DumpToolError( + format_subprocess_error(cmd, result.returncode, result.stdout, result.stderr) + ) + return result + + +def run_stream(cmd: Sequence[str]) -> None: + result = subprocess.run(cmd, cwd=ROOT_DIR, text=True) + if result.returncode != 0: + raise DumpToolError(format_subprocess_error(cmd, result.returncode)) + + +def get_unit_paths(unit_name: str) -> tuple[Path, Path]: + ensure_project_prereqs(require_build_ninja=True) + config = load_objdiff_config() + unit = find_objdiff_unit(config, unit_name) + if unit is None: + raise DumpToolError(f"Unit not found in objdiff.json: {unit_name}") + + metadata = unit.get("metadata") or {} + source_path = make_abs(metadata.get("source_path")) + if source_path is None: + raise DumpToolError(f"Unit has no source_path metadata: {unit_name}") + + target = unit.get("base_path") or unit.get("target_path") + target_path = make_abs(target) + if target_path is None: + raise DumpToolError(f"Unit has no build target in objdiff.json: {unit_name}") + + return Path(source_path), Path(target_path) + + +def rel_for_ninja(path: Path) -> str: + try: + return path.relative_to(ROOT_DIR).as_posix() + except ValueError: + return str(path) + + +def split_shell_pipeline(shell_line: str) -> str: + if "&&" in shell_line: + return shell_line.split("&&", 1)[0].strip() + return shell_line.strip() + + +def resolve_compile_info(unit_name: str) -> UnitCompileInfo: + source_path, target_path = get_unit_paths(unit_name) + target_rel = rel_for_ninja(target_path) + source_rel = rel_for_ninja(source_path) + + result = run_capture(["ninja", "-t", "commands", target_rel]) + compile_shell = "" + for raw_line in result.stdout.splitlines(): + line = raw_line.strip() + if "ngccc.exe" not in line: + continue + if source_rel in line or target_rel in line: + compile_shell = split_shell_pipeline(line) + break + + if not compile_shell: + raise DumpToolError( + "Failed to locate the ProDG compiler command for " + f"{unit_name} via `ninja -t commands {target_rel}`" + ) + + compile_argv = shlex.split(compile_shell) + for index, arg in enumerate(compile_argv): + if arg.endswith("ngccc.exe"): + return UnitCompileInfo( + unit_name=unit_name, + source_path=source_path, + target_path=target_path, + compile_shell=compile_shell, + compile_argv=compile_argv, + ngccc_index=index, + ) + + raise DumpToolError(f"Compiler line does not contain ngccc.exe:\n{compile_shell}") + + +def derived_base_name(info: UnitCompileInfo) -> str: + return info.source_path.stem + + +def derive_preprocess_command(info: UnitCompileInfo, ii_path: Path) -> List[str]: + compile_argv = list(info.compile_argv) + compiler_args = compile_argv[info.ngccc_index + 1 :] + filtered: List[str] = [] + + i = 0 + while i < len(compiler_args): + arg = compiler_args[i] + if arg in ("-MMD", "-MD", "-c"): + i += 1 + continue + if arg == "-o": + i += 2 + continue + filtered.append(arg) + i += 1 + + filtered.extend(["-E", "-o", str(ii_path)]) + return [*compile_argv[: info.ngccc_index + 1], *filtered] + + +def derive_cc1plus_args(info: UnitCompileInfo) -> List[str]: + compiler_args = list(info.compile_argv[info.ngccc_index + 1 :]) + filtered: List[str] = [] + + i = 0 + while i < len(compiler_args): + arg = compiler_args[i] + if arg in ("-c", "-E", "-S", "-M", "-MM", "-MD", "-MMD", "-MG", "-MP"): + i += 1 + continue + if arg in ("-o", "-x", "-I", "-D", "-U", "-MF", "-MT", "-MQ", "-include", "-imacros"): + i += 2 + continue + if arg.startswith("-I") or arg.startswith("-D") or arg.startswith("-U"): + i += 1 + continue + if not arg.startswith("-"): + i += 1 + continue + filtered.append(arg) + i += 1 + + return filtered + + +def derive_cc1plus_command( + info: UnitCompileInfo, ii_path: Path, dumpbase: Path, asm_path: Path +) -> List[str]: + compile_argv = list(info.compile_argv) + prefix = compile_argv[: info.ngccc_index] + ngccc_path = Path(compile_argv[info.ngccc_index]) + cc1plus_path = ngccc_path.with_name("cc1plus.exe") + if not cc1plus_path.exists(): + raise DumpToolError(f"Missing cc1plus.exe next to ngccc.exe: {cc1plus_path}") + + return [ + *prefix, + str(cc1plus_path), + *derive_cc1plus_args(info), + str(ii_path), + "-da", + "-fdump-unnumbered", + "-dumpbase", + str(dumpbase), + "-quiet", + "-o", + str(asm_path), + ] + + +def ensure_output_dir(path: Path, force: bool) -> None: + if path.exists() and not path.is_dir(): + raise DumpToolError(f"Output path is not a directory: {path}") + if path.exists() and any(path.iterdir()) and not force: + raise DumpToolError( + f"Output directory is not empty: {path}\n" + "Use --force to reuse it." + ) + path.mkdir(parents=True, exist_ok=True) + + +def parse_stages(value: str) -> List[str]: + stages = [stage.strip() for stage in value.split(",") if stage.strip()] + if not stages: + raise DumpToolError("Stage list must not be empty") + for stage in stages: + if not re.fullmatch(r"[A-Za-z0-9_+-]+", stage): + raise DumpToolError(f"Invalid stage name: {stage}") + return stages + + +def load_function_blocks(path: Path) -> List[FunctionBlock]: + lines = path.read_text(errors="replace").splitlines() + if path.suffix == ".s": + return load_assembly_function_blocks(lines) + if any(DTK_FN_RE.match(line) for line in lines): + return load_dtk_disasm_function_blocks(lines) + + blocks: List[FunctionBlock] = [] + current_header: Optional[str] = None + current_start = 0 + current_lines: List[str] = [] + + for index, line in enumerate(lines, start=1): + match = FUNCTION_HEADER_RE.match(line) + if match: + if current_header is not None: + blocks.append( + FunctionBlock( + header=current_header, + start_line=current_start, + lines=current_lines, + ) + ) + current_header = match.group(1) + current_start = index + current_lines = [line] + continue + + if current_header is not None: + current_lines.append(line) + + if current_header is not None: + blocks.append( + FunctionBlock(header=current_header, start_line=current_start, lines=current_lines) + ) + + return blocks + + +def load_assembly_function_blocks(lines: Sequence[str]) -> List[FunctionBlock]: + starts: Dict[str, int] = {} + labels: Dict[str, int] = {} + blocks: List[FunctionBlock] = [] + + for index, line in enumerate(lines, start=1): + weak_match = ASM_WEAK_RE.match(line) + if weak_match: + symbol = weak_match.group(1) + starts[symbol] = min(starts.get(symbol, index), index) + continue + + type_match = ASM_TYPE_RE.match(line) + if type_match: + symbol = type_match.group(1) + starts[symbol] = min(starts.get(symbol, index), index) + continue + + label_match = ASM_LABEL_RE.match(line) + if label_match: + symbol = label_match.group(1) + if symbol in starts and symbol not in labels: + labels[symbol] = index + continue + + size_match = ASM_SIZE_RE.match(line) + if size_match: + symbol = size_match.group(1) + if symbol in starts and symbol in labels: + start_line = starts[symbol] + blocks.append( + FunctionBlock( + header=symbol, + start_line=start_line, + lines=lines[start_line - 1 : index], + ) + ) + del starts[symbol] + del labels[symbol] + + return blocks + + +def load_dtk_disasm_function_blocks(lines: Sequence[str]) -> List[FunctionBlock]: + blocks: List[FunctionBlock] = [] + current_header: Optional[str] = None + current_start = 0 + current_lines: List[str] = [] + + for index, line in enumerate(lines, start=1): + fn_match = DTK_FN_RE.match(line) + if fn_match: + current_header = fn_match.group(1) + current_start = index + current_lines = [line] + continue + + if current_header is not None: + current_lines.append(line) + end_match = DTK_ENDFN_RE.match(line) + if end_match and end_match.group(1) == current_header: + blocks.append( + FunctionBlock( + header=current_header, + start_line=current_start, + lines=current_lines, + ) + ) + current_header = None + current_start = 0 + current_lines = [] + + return blocks + + +def is_dtk_disasm_block(block: FunctionBlock) -> bool: + return bool(block.lines and DTK_FN_RE.match(block.lines[0])) + + +def replace_dtk_labels(text: str, label_map: Dict[str, str]) -> str: + if not label_map: + return text + result = text + for label, replacement in sorted(label_map.items(), key=lambda item: len(item[0]), reverse=True): + result = result.replace(label, replacement) + return result + + +def normalize_dtk_disasm_lines(block: FunctionBlock) -> List[str]: + label_map: Dict[str, str] = {} + next_index = 1 + + def add_label(label: str) -> None: + nonlocal next_index + if label not in label_map: + label_map[label] = f".L_{next_index}" + next_index += 1 + + for line in block.lines: + label_match = DTK_LABEL_RE.match(line) + if label_match: + add_label(label_match.group(1)) + for label in DTK_LABEL_TOKEN_RE.findall(line): + add_label(label) + + normalized: List[str] = [] + for line in block.lines: + label_match = DTK_LABEL_RE.match(line) + if label_match: + normalized.append(f"{label_map[label_match.group(1)]}:") + continue + + insn_match = DTK_INSN_RE.match(line) + if insn_match: + byte_text = " ".join(insn_match.group("bytes").split()) + asm_text = replace_dtk_labels(insn_match.group("asm"), label_map) + normalized.append(f"/* {byte_text} */\t{asm_text}") + continue + + normalized.append(replace_dtk_labels(line, label_map)) + + return normalized + + +def replace_asm_labels(text: str, label_map: Dict[str, str]) -> str: + if not label_map: + return text + result = text + for label, replacement in sorted(label_map.items(), key=lambda item: len(item[0]), reverse=True): + result = result.replace(label, replacement) + return result + + +def is_debug_asm_label(line: str) -> bool: + return bool(ASM_DEBUG_LABEL_RE.match(line)) + + +def normalize_assembly_lines(block: FunctionBlock) -> List[str]: + label_map: Dict[str, str] = {} + next_index = 1 + + i = 0 + while i < len(block.lines): + line = block.lines[i] + if ASM_DEBUG_SECTION_RE.match(line): + i += 1 + while i < len(block.lines) and not ASM_PREVIOUS_RE.match(block.lines[i]): + i += 1 + if i < len(block.lines): + i += 1 + continue + + if is_debug_asm_label(line): + i += 1 + continue + + label_match = ASM_LABEL_RE.match(line) + if label_match: + label = label_match.group(1) + if label.startswith(".L") and label not in label_map: + label_map[label] = f".L_{next_index}" + next_index += 1 + i += 1 + + normalized: List[str] = [] + i = 0 + while i < len(block.lines): + line = block.lines[i] + if ASM_DEBUG_SECTION_RE.match(line): + i += 1 + while i < len(block.lines) and not ASM_PREVIOUS_RE.match(block.lines[i]): + i += 1 + if i < len(block.lines): + i += 1 + continue + + if is_debug_asm_label(line): + i += 1 + continue + + label_match = ASM_LABEL_RE.match(line) + if label_match: + label = label_match.group(1) + if label.startswith(".L"): + normalized.append(f"{label_map[label]}:") + else: + normalized.append(line) + i += 1 + continue + + normalized_line = replace_asm_labels(line, label_map) + if normalized_line.strip(): + normalized.append(normalized_line) + i += 1 + + return normalized + + +def choose_block(blocks: Sequence[FunctionBlock], query: str, exact: bool) -> FunctionBlock: + if exact: + matches = [block for block in blocks if block.header == query] + else: + matches = [ + block + for block in blocks + if block.header == query or block.header.endswith(query) or query in block.header + ] + + if not matches: + raise DumpToolError(f"Function not found in dump: {query}") + if len(matches) > 1: + options = "\n".join(f" - {block.header}" for block in matches[:10]) + more = "" if len(matches) <= 10 else f"\n ... (+{len(matches) - 10} more)" + raise DumpToolError( + f"Function query matched multiple dump blocks: {query}\n{options}{more}" + ) + return matches[0] + + +def resolve_stage_file(path_value: str, stage: str, base_name: Optional[str]) -> Path: + path = Path(path_value) + if path.is_file(): + return path + if not path.is_dir(): + raise DumpToolError(f"Dump path is neither a file nor a directory: {path}") + + candidates: List[Path] = [] + if base_name: + candidate = path / f"{base_name}.{stage}" + if candidate.exists(): + return candidate + candidates = sorted(path.glob(f"*.{stage}")) + if not candidates: + raise DumpToolError(f"No *.{stage} file found under {path}") + if len(candidates) > 1: + names = "\n".join(f" - {candidate.name}" for candidate in candidates[:10]) + more = "" if len(candidates) <= 10 else f"\n ... (+{len(candidates) - 10} more)" + raise DumpToolError( + f"Multiple *.{stage} files found under {path}; pass --base-name to disambiguate.\n" + f"{names}{more}" + ) + return candidates[0] + + +def format_block_lines( + block: FunctionBlock, line_numbers: bool, grep: Optional[re.Pattern[str]], context: int +) -> List[str]: + if grep is None: + if not line_numbers: + return list(block.lines) + return [ + f"{block.start_line + index}: {line}" + for index, line in enumerate(block.lines) + ] + + marks = [False] * len(block.lines) + for index, line in enumerate(block.lines): + if grep.search(line): + start = max(0, index - context) + end = min(len(block.lines), index + context + 1) + for mark_index in range(start, end): + marks[mark_index] = True + + if not any(marks): + return [] + + output: List[str] = [] + previous_selected = False + for index, (line, selected) in enumerate(zip(block.lines, marks)): + if selected: + prefix = f"{block.start_line + index}: " if line_numbers else "" + output.append(f"{prefix}{line}") + elif previous_selected: + output.append("--") + previous_selected = selected + + while output and output[-1] == "--": + output.pop() + return output + + +def parse_int_list(values: Sequence[str]) -> List[int]: + result: List[int] = [] + seen: set[int] = set() + for value in values: + for chunk in value.split(","): + item = chunk.strip() + if not item: + continue + try: + reg_num = int(item, 10) + except ValueError as e: + raise DumpToolError(f"Invalid register number: {item}") from e + if reg_num not in seen: + seen.add(reg_num) + result.append(reg_num) + return result + + +def parse_register_preferences(block: FunctionBlock) -> Dict[int, str]: + prefs: Dict[int, str] = {} + for line in block.lines: + stripped = line.strip() + match = REGISTER_PREF_RE.match(stripped) + if not match: + match = REGISTER_PREFERENCES_RE.match(stripped) + if match: + reg = int(match.group(1)) + prefs[reg] = match.group(2).rstrip(".").strip() + return prefs + + +def parse_register_dispositions(block: FunctionBlock) -> Dict[int, str]: + assignments: Dict[int, str] = {} + in_section = False + for line in block.lines: + stripped = line.strip() + if stripped == ";; Register dispositions:": + in_section = True + continue + if not in_section: + continue + if stripped.startswith(";; Hard regs used:"): + break + for match in REGISTER_DISPOSITION_PAIR_RE.finditer(line): + assignments[int(match.group(1))] = match.group(2).strip() + return assignments + + +def parse_register_assignments(block: FunctionBlock) -> Dict[int, str]: + assignments: Dict[int, str] = {} + for line in block.lines: + match = REGISTER_ASSIGN_RE.match(line.strip()) + if not match: + continue + assignments[int(match.group(1))] = match.group(2).strip() + assignments.update(parse_register_dispositions(block)) + return assignments + + +def parse_register_conflicts(block: FunctionBlock) -> Dict[int, str]: + conflicts: Dict[int, str] = {} + for line in block.lines: + match = REGISTER_CONFLICT_RE.match(line.strip()) + if not match: + continue + conflicts[int(match.group(1))] = match.group(2).strip() + return conflicts + + +def iter_block_entries(block: FunctionBlock) -> List[tuple[int, str]]: + entries: List[tuple[int, str]] = [] + current: List[str] = [] + current_start = block.start_line + + for index, line in enumerate(block.lines): + if not line.strip(): + if current: + entries.append((current_start, "\n".join(current))) + current = [] + continue + if not current: + current_start = block.start_line + index + current.append(line) + + if current: + entries.append((current_start, "\n".join(current))) + return entries + + +def update_summary_stats(mapping: Dict, key, line: int) -> None: + stats = mapping.get(key) + if stats is None: + mapping[key] = SummaryStats(count=1, first_line=line, last_line=line) + return + stats.count += 1 + stats.last_line = line + + +def read_paren_expr(text: str, start_index: int) -> tuple[str, int]: + index = start_index + while index < len(text) and text[index].isspace(): + index += 1 + if index >= len(text) or text[index] != "(": + raise DumpToolError(f"Expected '(' while parsing compare expression: {text[start_index:]}") + + depth = 0 + end = index + while end < len(text): + char = text[end] + if char == "(": + depth += 1 + elif char == ")": + depth -= 1 + if depth == 0: + return text[index : end + 1], end + 1 + end += 1 + raise DumpToolError(f"Unbalanced compare expression: {text[start_index:]}") + + +def simplify_compare_operand(expr: str) -> str: + expr = expr.strip() + + hard_match = HARD_REG_RE.fullmatch(expr) + if hard_match: + flags_match = USER_PSEUDO_RE.fullmatch(expr) + if flags_match: + return ( + f"reg/{flags_match.group('flags')}:{flags_match.group('mode')}" + f"#{flags_match.group('num')}/r{hard_match.group('hard')}" + ) + return f"reg:{hard_match.group('mode')}#{hard_match.group('num')}/r{hard_match.group('hard')}" + + pseudo_match = USER_PSEUDO_RE.fullmatch(expr) + if pseudo_match: + return ( + f"reg/{pseudo_match.group('flags')}:{pseudo_match.group('mode')}" + f"#{pseudo_match.group('num')}" + ) + + plain_reg_match = PLAIN_REG_RE.fullmatch(expr) + if plain_reg_match: + return f"reg:{plain_reg_match.group('mode')}#{plain_reg_match.group('num')}" + + const_match = re.fullmatch(r"\(const_int (-?\d+) \[[^]]+\]\)", expr) + if const_match: + return f"const:{const_match.group(1)}" + + frame_plus_match = FRAME_SLOT_PLUS_RE.fullmatch(expr) + if frame_plus_match: + return ( + f"frame:r{frame_plus_match.group('base')}" + f"{format_offset(int(frame_plus_match.group('offset')))}" + ) + + frame_direct_match = FRAME_SLOT_DIRECT_RE.fullmatch(expr) + if frame_direct_match: + return f"frame:r{frame_direct_match.group('base')}+0x0" + + head_match = re.match(r"\(([A-Za-z0-9_/:+-]+)", expr) + if head_match: + return head_match.group(1) + return expr + + +def parse_compare_signatures(entry_text: str) -> List[tuple[str, str, str]]: + flat = " ".join(entry_text.split()) + signatures: List[tuple[str, str, str]] = [] + search_from = 0 + while True: + index = flat.find("compare:", search_from) + if index < 0: + return signatures + kind_start = index + len("compare:") + kind_end = kind_start + while kind_end < len(flat) and not flat[kind_end].isspace(): + kind_end += 1 + kind = flat[kind_start:kind_end] + left_expr, next_index = read_paren_expr(flat, kind_end) + right_expr, next_index = read_paren_expr(flat, next_index) + signatures.append( + ( + kind, + simplify_compare_operand(left_expr), + simplify_compare_operand(right_expr), + ) + ) + search_from = next_index + + +def summarize_block(block: FunctionBlock) -> StageSummary: + pseudos: Dict[tuple[int, str, str], SummaryStats] = {} + hard_regs: Dict[int, SummaryStats] = {} + frame_slots: Dict[tuple[int, int], SummaryStats] = {} + compares: Dict[tuple[str, str, str], SummaryStats] = {} + + for start_line, entry_text in iter_block_entries(block): + for match in USER_PSEUDO_RE.finditer(entry_text): + key = ( + int(match.group("num")), + match.group("flags"), + match.group("mode"), + ) + update_summary_stats(pseudos, key, start_line) + for match in HARD_REG_RE.finditer(entry_text): + update_summary_stats(hard_regs, int(match.group("hard")), start_line) + for match in FRAME_SLOT_PLUS_RE.finditer(entry_text): + key = (int(match.group("base")), int(match.group("offset"))) + update_summary_stats(frame_slots, key, start_line) + for match in FRAME_SLOT_DIRECT_RE.finditer(entry_text): + key = (int(match.group("base")), 0) + update_summary_stats(frame_slots, key, start_line) + for signature in parse_compare_signatures(entry_text): + update_summary_stats(compares, signature, start_line) + + return StageSummary( + pseudos=pseudos, + hard_regs=hard_regs, + frame_slots=frame_slots, + compares=compares, + ) + + +def format_line_range(stats: SummaryStats) -> str: + if stats.first_line == stats.last_line: + return str(stats.first_line) + return f"{stats.first_line}-{stats.last_line}" + + +def format_offset(offset: int) -> str: + sign = "-" if offset < 0 else "+" + return f"{sign}0x{abs(offset):X}" + + +def print_stage_summary(block: FunctionBlock) -> None: + summary = summarize_block(block) + if not summary.pseudos and not summary.hard_regs and not summary.frame_slots and not summary.compares: + print("No summary data found.", flush=True) + return + + if summary.pseudos: + print("User pseudos:", flush=True) + for reg_num, flags, mode in sorted(summary.pseudos): + stats = summary.pseudos[(reg_num, flags, mode)] + print( + f" - r{reg_num} ({flags}:{mode}): {stats.count} refs " + f"[lines {format_line_range(stats)}]", + flush=True, + ) + if summary.hard_regs: + print("Hard registers:", flush=True) + for reg_num in sorted(summary.hard_regs): + stats = summary.hard_regs[reg_num] + print( + f" - r{reg_num}: {stats.count} refs [lines {format_line_range(stats)}]", + flush=True, + ) + if summary.frame_slots: + print("Frame slots (mem/f):", flush=True) + for base, offset in sorted(summary.frame_slots): + stats = summary.frame_slots[(base, offset)] + print( + f" - base r{base}{format_offset(offset)}: {stats.count} refs " + f"[lines {format_line_range(stats)}]", + flush=True, + ) + if summary.compares: + print("Compare signatures:", flush=True) + for kind, left_operand, right_operand in sorted(summary.compares): + stats = summary.compares[(kind, left_operand, right_operand)] + print( + f" - {kind}: {left_operand} vs {right_operand}: {stats.count} refs " + f"[lines {format_line_range(stats)}]", + flush=True, + ) + + +def print_summary_changes(left: FunctionBlock, right: FunctionBlock) -> None: + left_summary = summarize_block(left) + right_summary = summarize_block(right) + + def changed_counts(left_map: Dict, right_map: Dict) -> List[tuple]: + return sorted( + ( + key, + left_map.get(key).count if left_map.get(key) else 0, + right_map.get(key).count if right_map.get(key) else 0, + ) + for key in set(left_map) | set(right_map) + if (left_map.get(key).count if left_map.get(key) else 0) + != (right_map.get(key).count if right_map.get(key) else 0) + ) + + pseudo_changes = changed_counts(left_summary.pseudos, right_summary.pseudos) + hard_changes = changed_counts(left_summary.hard_regs, right_summary.hard_regs) + frame_changes = changed_counts(left_summary.frame_slots, right_summary.frame_slots) + compare_changes = changed_counts(left_summary.compares, right_summary.compares) + + if not pseudo_changes and not hard_changes and not frame_changes and not compare_changes: + return + + print("Stage summary changes:", flush=True) + if pseudo_changes: + print(" User pseudos:", flush=True) + for (reg_num, flags, mode), left_count, right_count in pseudo_changes: + print( + f" r{reg_num} ({flags}:{mode}): {left_count} -> {right_count}", + flush=True, + ) + if hard_changes: + print(" Hard registers:", flush=True) + for reg_num, left_count, right_count in hard_changes: + print(f" r{reg_num}: {left_count} -> {right_count}", flush=True) + if frame_changes: + print(" Frame slots:", flush=True) + for (base, offset), left_count, right_count in frame_changes: + print( + f" base r{base}{format_offset(offset)}: {left_count} -> {right_count}", + flush=True, + ) + if compare_changes: + print(" Compare signatures:", flush=True) + for (kind, left_operand, right_operand), left_count, right_count in compare_changes: + print( + f" {kind}: {left_operand} vs {right_operand}: " + f"{left_count} -> {right_count}", + flush=True, + ) + print(flush=True) + + +def format_entry_text(entry_text: str, start_line: int, line_numbers: bool) -> str: + lines = entry_text.splitlines() + if not line_numbers: + return "\n".join(lines) + return "\n".join(f"{start_line + index}: {line}" for index, line in enumerate(lines)) + + +def collect_entry_trace_tags( + entry_text: str, + pseudos: Sequence[int], + hard_regs: Sequence[int], + pseudo_homes: Dict[int, int], +) -> List[str]: + tags: List[str] = [] + seen_pseudos: set[int] = set() + seen_home_pseudos: set[int] = set() + seen_hard_regs: set[int] = set() + pseudo_targets = set(pseudos) + hard_targets = set(hard_regs) + home_to_pseudos: Dict[int, List[int]] = {} + for pseudo_reg, hard_reg in pseudo_homes.items(): + home_to_pseudos.setdefault(hard_reg, []).append(pseudo_reg) + + for match in USER_PSEUDO_RE.finditer(entry_text): + reg_num = int(match.group("num")) + if reg_num in pseudo_targets and reg_num not in seen_pseudos: + seen_pseudos.add(reg_num) + tags.append(f"pseudo {reg_num}") + for match in HARD_REG_RE.finditer(entry_text): + hard_reg = int(match.group("hard")) + if hard_reg in hard_targets and hard_reg not in seen_hard_regs: + seen_hard_regs.add(hard_reg) + tags.append(f"hard r{hard_reg}") + for pseudo_reg in home_to_pseudos.get(hard_reg, []): + if pseudo_reg not in seen_home_pseudos: + seen_home_pseudos.add(pseudo_reg) + tags.append(f"pseudo {pseudo_reg} via r{hard_reg}") + return tags + + +def command_trace(args: argparse.Namespace) -> None: + dump_path = resolve_stage_file(args.path, args.stage, args.base_name) + blocks = load_function_blocks(dump_path) + block = choose_block(blocks, args.function, exact=args.exact) + pseudos = parse_int_list(args.pseudo) + hard_regs = parse_int_list(args.hard_reg) + if not pseudos and not hard_regs: + raise DumpToolError("Trace requires at least one --pseudo or --hard-reg value") + + assignments = parse_register_assignments(block) + preferences = parse_register_preferences(block) + conflicts = parse_register_conflicts(block) + pseudo_homes: Dict[int, int] = {} + for reg_num in pseudos: + home = assignments.get(reg_num) + if home is None: + continue + try: + pseudo_homes[reg_num] = int(home, 10) + except ValueError: + continue + + print_section(f"{dump_path.name}: {block.header}") + if pseudos: + print("Pseudo summaries:", flush=True) + for reg_num in pseudos: + home = assignments.get(reg_num, "") + print(f" - pseudo {reg_num}: home {home}", flush=True) + if reg_num in preferences: + print(f" preferences: {preferences[reg_num]}", flush=True) + if reg_num in conflicts: + print(f" conflicts: {conflicts[reg_num]}", flush=True) + if hard_regs: + print("Hard-register traces:", flush=True) + for hard_reg in hard_regs: + assigned = sorted( + reg_num for reg_num, home in assignments.items() if home == str(hard_reg) + ) + assigned_text = ", ".join(str(reg_num) for reg_num in assigned) if assigned else "" + print(f" - hard r{hard_reg}: pseudos {assigned_text}", flush=True) + + matched_entries: List[tuple[int, str, List[str]]] = [] + for start_line, entry_text in iter_block_entries(block): + tags = collect_entry_trace_tags(entry_text, pseudos, hard_regs, pseudo_homes) + if tags: + matched_entries.append((start_line, entry_text, tags)) + + print(flush=True) + if not matched_entries: + print("No matching entries.", flush=True) + return + + limit = args.limit if args.limit and args.limit > 0 else len(matched_entries) + print(f"Matching entries ({min(len(matched_entries), limit)}/{len(matched_entries)}):", flush=True) + for start_line, entry_text, tags in matched_entries[:limit]: + line_count = len(entry_text.splitlines()) + end_line = start_line + line_count - 1 + tag_text = ", ".join(tags) + print(flush=True) + print(f"- lines {start_line}-{end_line} [{tag_text}]", flush=True) + print(format_entry_text(entry_text, start_line, line_numbers=args.line_numbers), flush=True) + if limit < len(matched_entries): + print(flush=True) + print(f"... truncated {len(matched_entries) - limit} additional entries", flush=True) + + +def print_register_change_summary(left: FunctionBlock, right: FunctionBlock) -> None: + left_prefs = parse_register_preferences(left) + right_prefs = parse_register_preferences(right) + left_assign = parse_register_assignments(left) + right_assign = parse_register_assignments(right) + + pref_changes = sorted( + reg + for reg in set(left_prefs) | set(right_prefs) + if left_prefs.get(reg) != right_prefs.get(reg) + ) + assign_changes = sorted( + reg + for reg in set(left_assign) | set(right_assign) + if left_assign.get(reg) != right_assign.get(reg) + ) + + if not pref_changes and not assign_changes: + return + + print("Register summary:", flush=True) + if pref_changes: + print(" Preference changes:", flush=True) + for reg in pref_changes: + print( + f" r{reg}: {left_prefs.get(reg, '')} -> " + f"{right_prefs.get(reg, '')}", + flush=True, + ) + if assign_changes: + print(" Final assignments:", flush=True) + for reg in assign_changes: + print( + f" r{reg}: {left_assign.get(reg, '')} -> " + f"{right_assign.get(reg, '')}", + flush=True, + ) + print(flush=True) + + +def command_command(args: argparse.Namespace) -> None: + info = resolve_compile_info(args.unit) + base_name = args.base_name or derived_base_name(info) + ii_path = Path(args.ii_path) if args.ii_path else Path(f"{base_name}.ii") + dumpbase = Path(args.dumpbase) if args.dumpbase else Path(base_name) + asm_path = Path(args.asm_path) if args.asm_path else Path(f"{base_name}.s") + + preprocess_cmd = derive_preprocess_command(info, ii_path) + cc1plus_cmd = derive_cc1plus_command(info, ii_path, dumpbase, asm_path) + + print_section(f"ProDG command: {args.unit}") + print(f"Source: {info.source_path}", flush=True) + print(f"Target: {info.target_path}", flush=True) + print(flush=True) + print("ngccc:", flush=True) + print(info.compile_shell, flush=True) + print(flush=True) + print("preprocess:", flush=True) + print(shlex.join(preprocess_cmd), flush=True) + print(flush=True) + print("cc1plus:", flush=True) + print(shlex.join(cc1plus_cmd), flush=True) + + +def command_dump(args: argparse.Namespace) -> None: + info = resolve_compile_info(args.unit) + out_dir = Path(args.out_dir).resolve() + ensure_output_dir(out_dir, force=args.force) + + base_name = args.base_name or derived_base_name(info) + ii_path = out_dir / f"{base_name}.ii" + asm_path = out_dir / f"{base_name}.s" + dumpbase = out_dir / base_name + log_path = out_dir / f"{base_name}.log" + + preprocess_cmd = derive_preprocess_command(info, ii_path) + cc1plus_cmd = derive_cc1plus_command(info, ii_path, dumpbase, asm_path) + + print_section(f"Dumping ProDG state: {args.unit}") + print(f"Output directory: {out_dir}", flush=True) + if args.print_commands: + print(flush=True) + print("preprocess:", flush=True) + print(shlex.join(preprocess_cmd), flush=True) + print(flush=True) + print("cc1plus:", flush=True) + print(shlex.join(cc1plus_cmd), flush=True) + print(flush=True) + + run_stream(preprocess_cmd) + cc1_result = subprocess.run(cc1plus_cmd, cwd=ROOT_DIR, text=True, capture_output=True) + log_text = "" + if cc1_result.stdout: + log_text += cc1_result.stdout + if cc1_result.stderr: + if log_text: + log_text += "\n" + log_text += cc1_result.stderr + log_path.write_text(log_text) + if cc1_result.returncode != 0: + raise DumpToolError( + format_subprocess_error( + cc1plus_cmd, cc1_result.returncode, cc1_result.stdout, cc1_result.stderr + ) + ) + + generated = sorted(path.name for path in out_dir.glob(f"{base_name}.*")) + print("Generated files:", flush=True) + for name in generated: + print(f" - {name}", flush=True) + print(f"Log: {log_path}", flush=True) + + +def command_extract(args: argparse.Namespace) -> None: + dump_path = resolve_stage_file(args.path, args.stage, args.base_name) + blocks = load_function_blocks(dump_path) + block = choose_block(blocks, args.function, exact=args.exact) + grep = re.compile(args.grep) if args.grep else None + lines = format_block_lines( + block, + line_numbers=args.line_numbers, + grep=grep, + context=args.context, + ) + + print_section(f"{dump_path.name}: {block.header}") + if grep and not lines: + print(f"No matches for {args.grep!r} inside {block.header}", flush=True) + return + print("\n".join(lines), flush=True) + + +def command_summary(args: argparse.Namespace) -> None: + dump_path = resolve_stage_file(args.path, args.stage, args.base_name) + blocks = load_function_blocks(dump_path) + block = choose_block(blocks, args.function, exact=args.exact) + + print_section(f"{dump_path.name}: {block.header}") + print_stage_summary(block) + + +def command_diff(args: argparse.Namespace) -> None: + left_input = Path(args.left) + right_input = Path(args.right) + direct_files = left_input.is_file() and right_input.is_file() + stages = ["file"] if direct_files else parse_stages(args.stages) + + for stage in stages: + if direct_files: + left_path = left_input + right_path = right_input + else: + left_path = resolve_stage_file(args.left, stage, args.left_base_name) + right_path = resolve_stage_file(args.right, stage, args.right_base_name) + try: + left_block = choose_block( + load_function_blocks(left_path), args.function, exact=args.exact + ) + right_block = choose_block( + load_function_blocks(right_path), args.function, exact=args.exact + ) + except DumpToolError as e: + if args.skip_missing: + print_section(f"{stage.upper()} diff: skipped") + print(f"{e}", flush=True) + print(f"Left: {left_path}", flush=True) + print(f"Right: {right_path}", flush=True) + continue + raise DumpToolError( + f"{e}\nStage: {stage}\nLeft: {left_path}\nRight: {right_path}" + ) + + print_section(f"{stage.upper()} diff: {left_block.header}") + if stage == "lreg": + print_register_change_summary(left_block, right_block) + if args.summary or args.summary_only: + print_summary_changes(left_block, right_block) + if args.summary_only: + continue + + left_lines = left_block.lines + right_lines = right_block.lines + if is_dtk_disasm_block(left_block) and is_dtk_disasm_block(right_block): + left_lines = normalize_dtk_disasm_lines(left_block) + right_lines = normalize_dtk_disasm_lines(right_block) + elif left_path.suffix == ".s" and right_path.suffix == ".s": + left_lines = normalize_assembly_lines(left_block) + right_lines = normalize_assembly_lines(right_block) + + diff_lines = list( + difflib.unified_diff( + left_lines, + right_lines, + fromfile=str(left_path), + tofile=str(right_path), + n=args.context, + lineterm="", + ) + ) + if not diff_lines: + print("No differences.", flush=True) + continue + print("\n".join(diff_lines), flush=True) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=( + "Generate and compare ProDG compiler-state dumps using the exact " + "ngccc command recovered from ninja." + ) + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + command = subparsers.add_parser( + "command", + help="Show the recovered ngccc command plus derived preprocess/cc1plus invocations", + ) + command.add_argument("-u", "--unit", required=True, help="objdiff unit name") + command.add_argument( + "--base-name", + help="base filename used when showing derived dump outputs (default: source stem)", + ) + command.add_argument("--ii-path", help="override the displayed preprocessed .ii path") + command.add_argument("--asm-path", help="override the displayed assembly output path") + command.add_argument("--dumpbase", help="override the displayed cc1plus -dumpbase path") + command.set_defaults(func=command_command) + + dump = subparsers.add_parser( + "dump", + help="Run ngccc -E and cc1plus -da for one unit into a dump directory", + ) + dump.add_argument("-u", "--unit", required=True, help="objdiff unit name") + dump.add_argument( + "-o", + "--out-dir", + required=True, + help="directory that will receive the .ii/.s/.rtl/.greg/.lreg dump files", + ) + dump.add_argument( + "--base-name", + help="base filename for generated dumps (default: source stem)", + ) + dump.add_argument( + "--force", + action="store_true", + help="allow writing into a non-empty output directory", + ) + dump.add_argument( + "--print-commands", + action="store_true", + help="print the derived preprocess and cc1plus commands before running them", + ) + dump.set_defaults(func=command_dump) + + extract = subparsers.add_parser( + "extract", + help="Extract one function block from a dump file or dump directory", + ) + extract.add_argument( + "path", + help="dump file path or dump directory produced by this tool", + ) + extract.add_argument( + "--stage", + default="lreg", + help="dump stage when PATH is a directory (default: lreg)", + ) + extract.add_argument( + "--base-name", + help="base filename used to disambiguate PATH when it contains multiple dump sets", + ) + extract.add_argument( + "-f", + "--function", + required=True, + help="function header query; exact or substring match", + ) + extract.add_argument( + "--exact", + action="store_true", + help="require an exact function-header match", + ) + extract.add_argument( + "--grep", + help="only print lines matching this regex, plus --context lines of surrounding dump text", + ) + extract.add_argument( + "-C", + "--context", + type=int, + default=2, + help="context lines to keep around --grep matches (default: 2)", + ) + extract.add_argument( + "--line-numbers", + action="store_true", + help="prefix output lines with their original dump file line numbers", + ) + extract.set_defaults(func=command_extract) + + trace = subparsers.add_parser( + "trace", + help="Trace selected pseudos or hard registers through one function block", + ) + trace.add_argument( + "path", + help="dump file path or dump directory produced by this tool", + ) + trace.add_argument( + "--stage", + default="greg", + help="dump stage when PATH is a directory (default: greg)", + ) + trace.add_argument( + "--base-name", + help="base filename used to disambiguate PATH when it contains multiple dump sets", + ) + trace.add_argument( + "-f", + "--function", + required=True, + help="function header query; exact or substring match", + ) + trace.add_argument( + "--exact", + action="store_true", + help="require an exact function-header match", + ) + trace.add_argument( + "--pseudo", + action="append", + default=[], + help="pseudo register numbers to trace (comma-separated, repeatable)", + ) + trace.add_argument( + "--hard-reg", + action="append", + default=[], + help="hard register numbers to trace (comma-separated, repeatable)", + ) + trace.add_argument( + "--limit", + type=int, + default=20, + help="maximum matching entries to print (default: 20, 0 = no limit)", + ) + trace.add_argument( + "--line-numbers", + action="store_true", + help="prefix traced entry lines with their original dump file line numbers", + ) + trace.set_defaults(func=command_trace) + + summary = subparsers.add_parser( + "summary", + help="Summarize one function's pseudos, hard regs, and frame-slot usage in a dump", + ) + summary.add_argument( + "path", + help="dump file path or dump directory produced by this tool", + ) + summary.add_argument( + "--stage", + default="rtl", + help="dump stage when PATH is a directory (default: rtl)", + ) + summary.add_argument( + "--base-name", + help="base filename used to disambiguate PATH when it contains multiple dump sets", + ) + summary.add_argument( + "-f", + "--function", + required=True, + help="function header query; exact or substring match", + ) + summary.add_argument( + "--exact", + action="store_true", + help="require an exact function-header match", + ) + summary.set_defaults(func=command_summary) + + diff = subparsers.add_parser( + "diff", + help="Diff one function across two dump files or dump directories", + ) + diff.add_argument("left", help="left dump file or dump directory") + diff.add_argument("right", help="right dump file or dump directory") + diff.add_argument( + "-f", + "--function", + required=True, + help="function header query; exact or substring match", + ) + diff.add_argument( + "--exact", + action="store_true", + help="require an exact function-header match", + ) + diff.add_argument( + "--stages", + default=",".join(DEFAULT_STAGES), + help=f"comma-separated dump stages to diff (default: {','.join(DEFAULT_STAGES)})", + ) + diff.add_argument( + "--left-base-name", + help="base filename used to disambiguate the left dump directory", + ) + diff.add_argument( + "--right-base-name", + help="base filename used to disambiguate the right dump directory", + ) + diff.add_argument( + "-C", + "--context", + type=int, + default=3, + help="unified diff context lines (default: 3)", + ) + diff.add_argument( + "--summary", + action="store_true", + help="print per-stage pseudo/hard-reg/frame-slot count changes before the diff", + ) + diff.add_argument( + "--summary-only", + action="store_true", + help="print summary deltas only and skip the full unified diff text", + ) + diff.add_argument( + "--skip-missing", + action="store_true", + help="skip stages where the requested function is missing instead of failing", + ) + diff.set_defaults(func=command_diff) + + return parser + + +def main() -> None: + parser = build_parser() + args = parser.parse_args() + try: + args.func(args) + except (DumpToolError, ToolError) as e: + fail(str(e)) + + +if __name__ == "__main__": + main() diff --git a/tools/share_worktree_assets.py b/tools/share_worktree_assets.py index 374d168e5..4bfdcf055 100644 --- a/tools/share_worktree_assets.py +++ b/tools/share_worktree_assets.py @@ -13,6 +13,8 @@ python tools/share_worktree_assets.py status --all python tools/share_worktree_assets.py link --all python tools/share_worktree_assets.py bootstrap + python tools/share_worktree_assets.py bootstrap --version EUROPEGERMILESTONE --xbox-xex /path/to/NfsMWEuropeGerMilestone.xex + python tools/share_worktree_assets.py bootstrap --version SLES-53558-A124 --ps2-toolchain-zip /path/to/PS2.zip """ import argparse @@ -21,6 +23,7 @@ import shutil import subprocess import sys +import zipfile from dataclasses import dataclass from typing import Dict, Iterable, List, Optional, Set @@ -39,11 +42,16 @@ class AssetSpec: FIXED_ASSETS = ( AssetSpec(os.path.join("orig", "GOWE69", "NFSMWRELEASE.ELF"), "file"), + AssetSpec( + os.path.join("orig", "EUROPEGERMILESTONE", "NfsMWEuropeGerMilestone.xex"), + "file", + ), AssetSpec(os.path.join("orig", "SLES-53558-A124", "NFS.ELF"), "file"), AssetSpec(os.path.join("orig", "SLES-53558-A124", "NFS.MAP"), "file"), AssetSpec(os.path.join("build", "tools"), "dir"), AssetSpec(os.path.join("build", "compilers"), "dir"), AssetSpec(os.path.join("build", "ppc_binutils"), "dir"), + AssetSpec(os.path.join("build", "mips_binutils"), "dir"), ) @@ -120,6 +128,85 @@ def ensure_parent(path: str) -> None: os.makedirs(parent, exist_ok=True) +def seed_shared_file(shared_path: str, source_path: str, description: str) -> None: + ensure_parent(shared_path) + if os.path.isfile(shared_path) and not os.path.islink(shared_path): + if not filecmp.cmp(shared_path, source_path, shallow=False): + raise RuntimeError( + f"Refusing to replace existing shared {description}: {shared_path}" + ) + return + if os.path.islink(shared_path): + if not same_symlink(shared_path, source_path): + raise RuntimeError( + f"Refusing to replace existing shared {description}: {shared_path}" + ) + os.unlink(shared_path) + elif lexists(shared_path): + raise RuntimeError( + f"Refusing to replace existing shared {description}: {shared_path}" + ) + shutil.copy2(source_path, shared_path) + + +def extract_zip_into(zip_path: str, output_dir: str) -> None: + os.makedirs(output_dir, exist_ok=True) + with zipfile.ZipFile(zip_path) as archive: + for member in archive.infolist(): + member_name = member.filename.rstrip("/") + if not member_name: + continue + + output_path = os.path.join(output_dir, *member_name.split("/")) + if member.is_dir(): + os.makedirs(output_path, exist_ok=True) + continue + + ensure_parent(output_path) + if os.path.exists(output_path): + continue + + with archive.open(member) as src, open(output_path, "wb") as dst: + shutil.copyfileobj(src, dst) + os.chmod(output_path, 0o755) + + +def seed_bootstrap_assets( + shared_root: str, xbox_xex: Optional[str], ps2_toolchain_zip: Optional[str] +) -> None: + if xbox_xex: + if not os.path.isfile(xbox_xex): + raise RuntimeError(f"Xbox XEX not found: {xbox_xex}") + seed_shared_file( + os.path.join( + shared_root, + "orig", + "EUROPEGERMILESTONE", + "NfsMWEuropeGerMilestone.xex", + ), + xbox_xex, + "Xbox XEX", + ) + + if ps2_toolchain_zip: + if not os.path.isfile(ps2_toolchain_zip): + raise RuntimeError(f"PS2 toolchain zip not found: {ps2_toolchain_zip}") + extract_zip_into(ps2_toolchain_zip, os.path.join(shared_root, "build", "compilers")) + expected_ee_gcc = os.path.join( + shared_root, + "build", + "compilers", + "PS2", + "ee-gcc2.9-991111", + "bin", + "ee-gcc.exe", + ) + if not os.path.isfile(expected_ee_gcc): + raise RuntimeError( + "PS2 toolchain zip did not produce build/compilers/PS2/ee-gcc2.9-991111/bin/ee-gcc.exe" + ) + + def merge_file(src: str, dst: str, relpath: str) -> None: ensure_parent(dst) if not os.path.exists(dst): @@ -342,18 +429,23 @@ def bootstrap_generated_files(worktree: str, version: str) -> None: objdiff_json = os.path.join(worktree, "objdiff.json") compile_commands = os.path.join(worktree, "compile_commands.json") config_target = os.path.join("build", version, "config.json") + configure_cmd = [sys.executable, "configure.py", "--version", version] - print(f"{worktree}: running configure.py") - run_command([sys.executable, "configure.py"], worktree, "configure.py") + print(f"{worktree}: running {' '.join(configure_cmd)}") + run_command(configure_cmd, worktree, "configure.py") if not os.path.isfile(build_ninja): raise RuntimeError(f"{worktree}: configure.py did not create build.ninja") - if not os.path.isfile(objdiff_json) or not os.path.isfile(compile_commands): + if ( + not os.path.isfile(config_target) + or not os.path.isfile(objdiff_json) + or not os.path.isfile(compile_commands) + ): print(f"{worktree}: generating {config_target} for local objdiff metadata") run_command(["ninja", config_target], worktree, f"ninja {config_target}") - print(f"{worktree}: rerunning configure.py") - run_command([sys.executable, "configure.py"], worktree, "configure.py") + print(f"{worktree}: rerunning {' '.join(configure_cmd)}") + run_command(configure_cmd, worktree, "configure.py") missing = [] if not os.path.isfile(objdiff_json): @@ -373,7 +465,10 @@ def bootstrap_worktrees( version: str, run_health: bool, smoke_build: Optional[str], + xbox_xex: Optional[str], + ps2_toolchain_zip: Optional[str], ) -> int: + seed_bootstrap_assets(shared_root, xbox_xex, ps2_toolchain_zip) link_assets(target_worktrees, seed_worktrees, shared_root) for worktree in target_worktrees: bootstrap_generated_files(worktree, version) @@ -418,6 +513,16 @@ def main() -> int: metavar="UNIT", help="Also run `decomp-workflow.py health --smoke-build UNIT` after bootstrap.", ) + parser.add_argument( + "--xbox-xex", + metavar="PATH", + help="Seed the shared Xbox XEX from a local file before linking/bootstrap.", + ) + parser.add_argument( + "--ps2-toolchain-zip", + metavar="PATH", + help="Extract a local PS2 EE-GCC zip into shared build/compilers before linking/bootstrap.", + ) args = parser.parse_args() common_dir = git_common_dir(root_dir) @@ -437,6 +542,8 @@ def main() -> int: args.version, args.health, args.smoke_build, + args.xbox_xex, + args.ps2_toolchain_zip, ) except RuntimeError as e: print(f"Error: {e}", file=sys.stderr)