From 2b78dbf831ed2662c1b8bac008a9b97b3e8a2083 Mon Sep 17 00:00:00 2001 From: Elias Bachaalany Date: Sun, 17 May 2026 07:49:27 -0700 Subject: [PATCH] v0.0.14: write-ready tables, faster navigation, MCP on by default What's new - Read and WRITE through tables. Annotation, name, comment, bookmark, function, and instruction-detail surfaces are now first-class tables. Mutations happen via plain UPDATE/DELETE - no one-off scalar helpers. - Faster next_head / prev_head navigation via ordered range pushdown on the heads table. - Welcome metadata now reports the strings count. - MCP server is enabled by default. Fixes - High-bit (64-bit) addresses route correctly. - MCP server starts only after the engine is fully ready. - Cleaner error messages on UPDATE failures. Migration - The retired scalar helpers each have a direct table-based equivalent. See README and prompts/idasql_agent.md for the full mapping. --- .gitignore | 3 +- CMakeLists.txt | 6 + README.md | 40 +- examples/example_basic.cpp | 4 +- examples/example_decompiler.cpp | 21 +- examples/example_functions.cpp | 9 +- examples/example_grep_search.cpp | 9 - examples/example_instructions.cpp | 16 +- examples/example_strings.cpp | 7 +- ida-plugin.json | 2 +- prompts/idasql_agent.md | 467 ++++++++----- src/cli/CMakeLists.txt | 8 +- src/common/idasql_version.hpp | 4 +- src/common/mcp_server.cpp | 14 +- src/lib/include/idasql/fwd.hpp | 2 +- src/lib/include/idasql/vtable.hpp | 1 + src/lib/src/database.cpp | 2 +- src/lib/src/decompiler.cpp | 75 +- src/lib/src/decompiler.hpp | 14 + src/lib/src/entities.cpp | 872 ++++++++++++++++++++--- src/lib/src/entities.hpp | 89 ++- src/lib/src/functions.cpp | 1084 ++--------------------------- src/lib/src/metadata_welcome.cpp | 3 + src/lib/src/metadata_welcome.hpp | 1 + src/lib/src/search_bytes.cpp | 348 +++++---- src/lib/src/search_bytes.hpp | 10 +- 26 files changed, 1611 insertions(+), 1500 deletions(-) diff --git a/.gitignore b/.gitignore index e2b7f26..127eb33 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,8 @@ build*/ .claude/ # IDE .vs/ -.vscode/ +.vscode/* +!.vscode/settings.json *.suo *.user *.sln.docstates diff --git a/CMakeLists.txt b/CMakeLists.txt index 967c577..7590eeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,6 +106,12 @@ if(IDASQL_WITH_MCP) if(MSVC AND TARGET fastmcpp_core) target_compile_options(fastmcpp_core PRIVATE /MP) endif() + + # fastmcpp_core is a static library that gets linked into the idasql plugin + # (a shared library on Linux). Enable PIC so the link succeeds. + if(TARGET fastmcpp_core) + set_property(TARGET fastmcpp_core PROPERTY POSITION_INDEPENDENT_CODE ON) + endif() endif() # Add subdirectories diff --git a/README.md b/README.md index ece8716..786896b 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ claude /install-plugin https://github.com/allthingsida/idasql-skills $ idasql Error: Database path required (-s) -idasql v0.0.12 - SQL interface to IDA databases +idasql v0.0.14 - SQL interface to IDA databases Usage: idasql -s [-q ] [-f ] [-i] [--export ] @@ -211,14 +211,31 @@ Thank you for using IDA. Have a nice day! #### Prerequisites - CMake 3.20+ -- C++17 compiler +- C++20 compiler - IDA SDK 9.0+ (set `IDASDK` environment variable) ```bash -cmake -S src -B build -DIDASQL_WITH_MCP=OFF +cmake -S . -B build -DIDASQL_WITH_MCP=ON -DIDASQL_BUILD_EXAMPLES=OFF cmake --build build --config Release ``` +Useful CMake switches: + +| Switch | Default | Description | +|--------|---------|-------------| +| `IDASQL_WITH_MCP` | `ON` | Build MCP server support via `fastmcpp`. Disable for a smaller/offline build or when you do not need `--mcp` / `.mcp`. | +| `IDASQL_BUILD_CLI` | `ON` | Build the standalone `idasql` command-line tool. | +| `IDASQL_BUILD_PLUGIN` | `ON` | Build the IDA plugin. | +| `IDASQL_BUILD_EXAMPLES` | `ON` | Build the example programs under `examples/`. | + +Notes: + +- Hex-Rays support is always compiled in and detected at runtime. If Hex-Rays is unavailable, decompiler-backed tables/functions are simply not registered. +- HTTP REST support is always compiled in; use `--http` from the CLI or `.http start` from the REPL/plugin CLI. +- IDAPython SQL execution is compiled in but disabled by default at runtime. Enable it per session with `PRAGMA idasql.enable_idapython = 1;`. +- `IDASQL_WITH_MCP=ON` fetches `fastmcpp`; `OFF` removes MCP support and the `--mcp` / `.mcp` commands. +- `XSQL_WITH_THINCLIENT` is forced `ON`, and `HTTPLIB_USE_OPENSSL_IF_AVAILABLE` is forced `OFF` because IDASQL uses local plain HTTP. + ## Available Tables 30+ virtual tables covering functions, strings, types, cross-references, disassembly, decompilation, and more. @@ -236,7 +253,8 @@ cmake --build build --config Release | `blocks` | Basic blocks - start/end address, func_ea, size | | `fchunks` | Function chunks - split/tail chunks with owner | | `instructions` | Disassembly - address, mnemonic, operands, itype, func_addr (DELETE) | -| `heads` | All head items (code + data) - address, size, type, flags, disasm | +| `instruction_operands` | Normalized instruction operands - opnum, text, type, value; optimized by `address` and `func_addr` | +| `heads` | All head items (code + data) - optimized address lookup/range navigation | ### Strings & Bytes @@ -310,14 +328,14 @@ cmake --build build --config Release | Function | Description | |----------|-------------| -| `grep(pattern, limit, offset)` | Unified entity search function (returns JSON) | | `decompile(addr)` | Decompile function at address (returns pseudocode) | | `disasm_at(addr)` | Canonical disassembly listing at address | | `get_ui_context_json()` | Plugin-only UI context JSON (GUI runtime only) | ### Unified Entity Search -Use the `grep` table for composable SQL searches and `grep()` when JSON output is preferred. +Use the `grep` table for composable SQL searches over named functions, labels, +segments, types, and members. ```sql -- Search anything starting with "Create" @@ -339,8 +357,12 @@ FROM grep WHERE pattern = 'dw%' AND kind = 'member'; --- JSON form with pagination -SELECT grep('Create%', 20, 0); +-- Pagination +SELECT name, kind, full_name +FROM grep +WHERE pattern = 'Create%' +ORDER BY kind, name +LIMIT 20 OFFSET 20; ``` ## Integration @@ -394,7 +416,7 @@ The server uses a random port (8100-8199) to avoid conflicts with `--http`. For MCP-compatible clients (Model Context Protocol, a standard for AI tool integration): -`--mcp` and `.mcp` are available only when built with `-DIDASQL_WITH_MCP=ON` (default is `OFF`). +`--mcp` and `.mcp` are available when built with `-DIDASQL_WITH_MCP=ON`, which is the default. Build with `-DIDASQL_WITH_MCP=OFF` to omit MCP support. ```bash # Standalone mode diff --git a/examples/example_basic.cpp b/examples/example_basic.cpp index 55bc1b4..7626042 100644 --- a/examples/example_basic.cpp +++ b/examples/example_basic.cpp @@ -116,8 +116,8 @@ int main(int argc, char* argv[]) { // Get function at specific index auto first_func = session.query( - "SELECT printf('0x%X', func_at_index(0)) as addr, " - " func_at(func_at_index(0)) as name" + "SELECT printf('0x%X', address) as addr, name " + "FROM funcs WHERE rowid = 0" ); if (!first_func.empty()) { std::cout << "First function: " << first_func.rows[0][1] diff --git a/examples/example_decompiler.cpp b/examples/example_decompiler.cpp index e0a4848..deb6721 100644 --- a/examples/example_decompiler.cpp +++ b/examples/example_decompiler.cpp @@ -48,7 +48,7 @@ int main(int argc, char* argv[]) { std::cout << "=== Decompiler Analysis ===\n\n"; // Try to decompile a function to check if Hex-Rays is available - auto test = session.query("SELECT decompile(func_at_index(0)) as code"); + auto test = session.query("SELECT decompile((SELECT address FROM funcs WHERE rowid = 0)) as code"); if (!test.success || test.empty() || test.rows[0][0].find("Decompiler") != std::string::npos) { std::cerr << "Warning: Hex-Rays decompiler may not be available.\n"; std::cerr << "Some queries may fail or return empty results.\n\n"; @@ -62,10 +62,11 @@ int main(int argc, char* argv[]) { auto complex = session.query( "SELECT " - " func_at(func_addr) as name, " + " f.name as name, " " COUNT(*) as lines " - "FROM pseudocode " - "GROUP BY func_addr " + "FROM pseudocode p " + "JOIN funcs f ON p.func_addr = f.address " + "GROUP BY p.func_addr, f.name " "ORDER BY lines DESC " "LIMIT 10" ); @@ -84,12 +85,13 @@ int main(int argc, char* argv[]) { auto most_vars = session.query( "SELECT " - " func_at(func_addr) as name, " + " f.name as name, " " COUNT(*) as total_vars, " " SUM(CASE WHEN is_arg = 1 THEN 1 ELSE 0 END) as args, " " SUM(CASE WHEN is_arg = 0 THEN 1 ELSE 0 END) as locals " - "FROM ctree_lvars " - "GROUP BY func_addr " + "FROM ctree_lvars l " + "JOIN funcs f ON l.func_addr = f.address " + "GROUP BY l.func_addr, f.name " "ORDER BY total_vars DESC " "LIMIT 10" ); @@ -196,8 +198,9 @@ int main(int argc, char* argv[]) { std::cout << "\n=== Lines Containing 'if' Statements ===\n"; auto if_lines = session.query( - "SELECT func_at(func_addr) as func, line " - "FROM pseudocode " + "SELECT f.name as func, p.line " + "FROM pseudocode p " + "JOIN funcs f ON p.func_addr = f.address " "WHERE line LIKE '%if (%' " "LIMIT 10" ); diff --git a/examples/example_functions.cpp b/examples/example_functions.cpp index 2ad3b61..128e50e 100644 --- a/examples/example_functions.cpp +++ b/examples/example_functions.cpp @@ -98,10 +98,11 @@ int main(int argc, char* argv[]) { std::cout << "\n=== Top 10 Functions Making Most Calls ===\n"; auto most_calls = session.query( - "SELECT func_at(func_addr) as name, COUNT(*) as calls " - "FROM instructions " - "WHERE mnemonic = 'call' " - "GROUP BY func_addr " + "SELECT f.name as name, COUNT(*) as calls " + "FROM instructions i " + "JOIN funcs f ON i.func_addr = f.address " + "WHERE i.mnemonic = 'call' " + "GROUP BY i.func_addr, f.name " "ORDER BY calls DESC " "LIMIT 10" ); diff --git a/examples/example_grep_search.cpp b/examples/example_grep_search.cpp index 2aad8a5..018babb 100644 --- a/examples/example_grep_search.cpp +++ b/examples/example_grep_search.cpp @@ -9,7 +9,6 @@ * example_grep_search.cpp - Grep-style unified entity search * * Demonstrates: - * - grep() SQL function (JSON output) * - grep virtual table (structured rows) * - Pattern semantics and pagination */ @@ -43,14 +42,6 @@ int main(int argc, char* argv[]) { escaped_pattern.insert(pos, 1, '\''); } - std::cout << "=== grep() JSON Search ===\n\n"; - auto json_result = session.query( - "SELECT grep('" + escaped_pattern + "', 10, 0)" - ); - if (json_result.row_count() > 0) { - std::cout << json_result.scalar() << "\n\n"; - } - std::cout << "=== grep Table Search ===\n\n"; auto rows = session.query( diff --git a/examples/example_instructions.cpp b/examples/example_instructions.cpp index 2dddcfc..1f65c12 100644 --- a/examples/example_instructions.cpp +++ b/examples/example_instructions.cpp @@ -95,10 +95,11 @@ int main(int argc, char* argv[]) { std::cout << "\n=== Functions with Most NOPs ===\n"; auto nops = session.query( - "SELECT func_at(func_addr) as name, COUNT(*) as nop_count " - "FROM instructions " - "WHERE mnemonic = 'nop' " - "GROUP BY func_addr " + "SELECT f.name as name, COUNT(*) as nop_count " + "FROM instructions i " + "JOIN funcs f ON i.func_addr = f.address " + "WHERE i.mnemonic = 'nop' " + "GROUP BY i.func_addr, f.name " "HAVING nop_count > 5 " "ORDER BY nop_count DESC " "LIMIT 10" @@ -135,11 +136,12 @@ int main(int argc, char* argv[]) { // Functions with unusual push/pop ratio auto unusual = session.query( "SELECT " - " func_at(func_addr) as name, " + " f.name as name, " " SUM(CASE WHEN mnemonic = 'push' THEN 1 ELSE 0 END) as pushes, " " SUM(CASE WHEN mnemonic = 'pop' THEN 1 ELSE 0 END) as pops " - "FROM instructions " - "GROUP BY func_addr " + "FROM instructions i " + "JOIN funcs f ON i.func_addr = f.address " + "GROUP BY i.func_addr, f.name " "HAVING pushes > 20 AND ABS(pushes - pops) > 5 " "ORDER BY pushes DESC " "LIMIT 10" diff --git a/examples/example_strings.cpp b/examples/example_strings.cpp index b6e265f..7ac9bc5 100644 --- a/examples/example_strings.cpp +++ b/examples/example_strings.cpp @@ -142,11 +142,12 @@ int main(int argc, char* argv[]) { std::cout << "\n=== Functions Using Most Strings (Top 10) ===\n"; auto by_func = session.query( - "SELECT func_at(x.from_ea) as func_name, COUNT(DISTINCT s.address) as str_count " + "SELECT f.name as func_name, COUNT(DISTINCT s.address) as str_count " "FROM strings s " "JOIN xrefs x ON s.address = x.to_ea " - "WHERE func_at(x.from_ea) IS NOT NULL " - "GROUP BY func_at(x.from_ea) " + "JOIN funcs f ON x.from_func = f.address " + "WHERE x.from_func != 0 " + "GROUP BY x.from_func, f.name " "ORDER BY str_count DESC " "LIMIT 10" ); diff --git a/ida-plugin.json b/ida-plugin.json index a2e1b6d..8241bc3 100644 --- a/ida-plugin.json +++ b/ida-plugin.json @@ -2,7 +2,7 @@ "IDAMetadataDescriptorVersion": 1, "plugin": { "name": "IDASQL", - "version": "0.0.12", + "version": "0.0.14", "entryPoint": "idasql", "description": "SQL interface for IDA databases. Query functions, xrefs, strings, types, and more using SQL. Supports local CLI, HTTP REST server, and optional MCP integration.", "urls": { diff --git a/prompts/idasql_agent.md b/prompts/idasql_agent.md index fd5b7c9..433a586 100644 --- a/prompts/idasql_agent.md +++ b/prompts/idasql_agent.md @@ -31,11 +31,14 @@ Address-taking SQL functions accept: - numeric strings (`'4198400'`, `'0x401000'`) - symbol names resolved with `get_name_ea(BADADDR, name)` (global names) +Quoted numeric strings are for address-taking scalar functions. For table +predicates, compare address columns to integer EAs such as `address = 0x401000`. + Examples: ```sql SELECT decompile('DriverEntry'); SELECT set_type('DriverEntry', 'NTSTATUS DriverEntry(PDRIVER_OBJECT, PUNICODE_STRING);'); -SELECT comment_at('0x401000'); +SELECT (SELECT comment FROM comments WHERE address = 0x401000 LIMIT 1); ``` If a symbol cannot be resolved, SQL functions return an explicit error like: @@ -488,7 +491,12 @@ SELECT type_name, layout_name, COUNT(*) as count FROM strings GROUP BY type_name, layout_name ORDER BY count DESC; ``` -**Important:** For new analysis (exe/dll), strings are auto-built. For existing databases (i64/idb), strings are already saved. If you see 0 strings unexpectedly, run `SELECT rebuild_strings()` once to rebuild the list. See String List Functions section below. +**Important:** For new analysis (exe/dll), strings are auto-built. For existing databases (i64/idb), strings are already saved. If you see 0 strings unexpectedly, run `SELECT rebuild_strings()` once to rebuild the list. See String List Surfaces section below. + +Current count: +```sql +SELECT COUNT(*) AS strings FROM strings; +``` #### xrefs Cross-references - the most important table for understanding code relationships. @@ -523,7 +531,7 @@ Basic blocks within functions. **Use `func_ea` constraint for performance.** SELECT * FROM blocks WHERE func_ea = 0x401000; -- Functions with most basic blocks -SELECT func_at(func_ea) as name, COUNT(*) as blocks +SELECT (SELECT name FROM funcs WHERE func_ea >= address AND func_ea < end_ea LIMIT 1) as name, COUNT(*) as blocks FROM blocks GROUP BY func_ea ORDER BY blocks DESC LIMIT 10; ``` `WHERE func_ea = X` is the optimized path. Without it, `blocks` may scan all functions. @@ -581,7 +589,7 @@ Use `strings + xrefs + funcs` directly. This is the canonical pattern. SELECT s.content as string_value, printf('0x%X', x.from_ea) as ref_addr, - func_at(x.from_ea) as func_name + (SELECT name FROM funcs WHERE x.from_ea >= address AND x.from_ea < end_ea LIMIT 1) as func_name FROM strings s JOIN xrefs x ON x.to_ea = s.address WHERE s.content LIKE '%error%' OR s.content LIKE '%fail%' @@ -589,7 +597,7 @@ ORDER BY func_name, ref_addr; -- Functions with most string references SELECT - func_at(x.from_ea) as func_name, + (SELECT name FROM funcs WHERE x.from_ea >= address AND x.from_ea < end_ea LIMIT 1) as func_name, COUNT(*) as string_refs FROM strings s JOIN xrefs x ON x.to_ea = s.address @@ -655,7 +663,41 @@ SET operand1_format_spec = 'clear' WHERE address = 0x401020; ``` -**Performance:** `WHERE func_addr = X` uses O(function_size) iteration. Without this constraint, it scans the entire database - SLOW. +#### instruction_operands + +`instruction_operands` exposes one row per decoded non-void operand. Use it when you need operand type/value fields, all operands as rows, or the old scalar operand/decode helper shape in joinable form. + +| Column | Type | Description | +|--------|------|-------------| +| `address` | INT | Instruction address | +| `func_addr` | INT | Containing function | +| `opnum` | INT | Operand index | +| `text` | TEXT | Operand text | +| `type_code` | INT | IDA operand type code | +| `type_name` | TEXT | Operand type name (`reg`, `imm`, `near`, ...) | +| `dtype` | INT | Operand dtype | +| `reg` | INT | Register number when applicable | +| `addr` | INT | Referenced address/displacement when applicable | +| `raw_value` | INT | Raw operand value | +| `value` | INT | Best-effort scalar operand value | + +```sql +-- Operand rows for one instruction +SELECT opnum, text, type_name, value +FROM instruction_operands +WHERE address = 0x401000 +ORDER BY opnum; + +-- Instruction details with normalized operands +SELECT i.address, i.itype, i.mnemonic, i.size, o.opnum, o.text, o.type_name, o.value +FROM instructions i +LEFT JOIN instruction_operands o + ON o.address = i.address AND o.func_addr = 0x401000 +WHERE i.func_addr = 0x401000 +ORDER BY i.address, o.opnum; +``` + +**Performance:** `WHERE address = X` decodes one instruction; `WHERE func_addr = X` uses O(function_size) iteration. Without one of these constraints, it scans the entire database - SLOW. #### disasm_calls All call instructions with resolved targets. @@ -669,7 +711,7 @@ All call instructions with resolved targets. ```sql -- Functions that call malloc -SELECT DISTINCT func_at(func_addr) as caller +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as caller FROM disasm_calls WHERE callee_name LIKE '%malloc%'; ``` @@ -730,7 +772,7 @@ INSERT INTO types_enum_values (type_ordinal, value_name, value) VALUES (15, 'FLA INSERT INTO types_enum_values (type_ordinal, value_name, value, comment) VALUES (15, 'FLAG_HIDDEN', 2, 'not visible in UI'); -- Rename a local variable -SELECT rename_lvar(0x401000, 2, 'buffer_size'); +UPDATE ctree_lvars SET name = 'buffer_size' WHERE func_addr = 0x401000 AND idx = 2; -- Change variable type UPDATE ctree_lvars SET type = 'char *' @@ -920,15 +962,15 @@ Return statements with details about what's being returned. ```sql -- Functions that return 0 -SELECT DISTINCT func_at(func_addr) as name FROM ctree_v_returns +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name FROM ctree_v_returns WHERE return_op = 'cot_num' AND return_num = 0; -- Functions that return -1 (error sentinel) -SELECT DISTINCT func_at(func_addr) as name FROM ctree_v_returns +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name FROM ctree_v_returns WHERE return_op = 'cot_num' AND return_num = -1; -- Functions that return their argument (pass-through) -SELECT DISTINCT func_at(func_addr) as name FROM ctree_v_returns +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name FROM ctree_v_returns WHERE returns_arg = 1; ``` @@ -1046,22 +1088,33 @@ Convenience views for filtering types: ### Extended Tables #### bytes -Byte-wise program view with patch support. +Pure mapped-byte program view with patch support. This table is one row per +mapped byte address; IDA item metadata such as size/type belongs to `heads`. | Column | Type | RW | Description | |--------|------|----|-------------| -| `ea` | INT | R | Address | +| `ea` | INT | R | Byte address | | `value` | INT | RW | Current byte value (UPDATE patches byte) | | `original_value` | INT | R | Original byte value before patch | -| `size` | INT | R | Item size at address | -| `type` | TEXT | R | Item type (`code`, `data`, etc.) | | `is_patched` | INT | R | 1 if byte differs from original | +| `fpos` | INT | R | Physical/input file offset (NULL when unavailable) | ```sql -- Read one address SELECT ea, value, original_value, is_patched FROM bytes WHERE ea = 0x401000; +-- Read a byte range, including item-tail bytes +SELECT ea, value +FROM bytes +WHERE ea >= 0x401000 AND ea < 0x401010 +ORDER BY ea; + +-- Get item metadata separately +SELECT address, size, type, flags, disasm +FROM heads +WHERE address = 0x401000; + -- Patch via table update UPDATE bytes SET value = 0x90 WHERE ea = 0x401000; @@ -1147,7 +1200,7 @@ All defined items (code/data heads) in the database. | `size` | INT | Item size | | `flags` | INT | IDA flags | -**Performance:** This table can be very large. Always use address range filters. +**Performance:** `WHERE address = X` and address range filters are optimized. Next/previous navigation should use `ORDER BY address [DESC] LIMIT 1`; broad scans can still be large. #### fixups Relocation and fixup information. @@ -1194,7 +1247,7 @@ Function chunks (for functions with non-contiguous code, like exception handlers ```sql -- Functions with multiple chunks (complex control flow) -SELECT func_at(func_addr) as name, COUNT(*) as chunks +SELECT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name, COUNT(*) as chunks FROM fchunks GROUP BY func_addr HAVING chunks > 1; ``` @@ -1271,7 +1324,7 @@ Views for disassembly-level analysis (no Hex-Rays required): SELECT * FROM disasm_v_leaf_funcs LIMIT 10; -- Find hotspot calls (inside loops) -SELECT func_at(func_addr) as func, callee_name +SELECT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as func, callee_name FROM disasm_v_calls_in_loops; ``` @@ -1288,8 +1341,6 @@ FROM disasm_v_calls_in_loops; | `disasm(addr, n)` | Next N instructions from address (count-based, not boundary-aware) | | `disasm_range(start, end)` | All disassembly lines in address range [start, end) | | `disasm_func(addr)` | Full disassembly of function containing address | -| `mnemonic(addr)` | Instruction mnemonic only | -| `operand(addr, n)` | Operand text (n=0-5) | #### Disassembly Examples @@ -1333,7 +1384,7 @@ WHERE func_addr = 0x401000 AND mnemonic = 'call'; ### Byte Access and Patching | Function | Description | |----------|-------------| -| `bytes(addr, n)` | Read `n` bytes as hex string | +| `bytes(addr, n)` | Read `n` raw bytes as hex string | | `bytes_raw(addr, n)` | Read `n` bytes as BLOB | | `patch_byte(addr, val)` | Patch one byte at `addr` (returns 1/0) | | `patch_word(addr, val)` | Patch 2 bytes at `addr` (returns 1/0) | @@ -1360,12 +1411,18 @@ SELECT save_database(); ``` ### Binary Search -| Function | Description | -|----------|-------------| -| `search_bytes(pattern)` | Find all matches, returns JSON array | -| `search_bytes(pattern, start, end)` | Search within address range | -| `search_first(pattern)` | First match address (or NULL) | -| `search_first(pattern, start, end)` | First match in range | +Use the `byte_search` table for raw bytes/opcodes. It requires `WHERE pattern = ...`; `matched_hex` is an output column, not the search input. + +| Column | Description | +|--------|-------------| +| `address` | Match address | +| `matched_hex` | Matched bytes rendered as hex text | +| `matched_bytes` | Matched bytes as a BLOB | +| `size` | Match size in bytes | +| `pattern` | Hidden required IDA byte pattern input | +| `start_ea` | Hidden optional inclusive lower bound | +| `end_ea` | Hidden optional exclusive upper bound | +| `max_results` | Hidden optional generator cap | **Pattern syntax (IDA native):** - `"48 8B 05"` - Exact bytes (hex, space-separated) @@ -1377,18 +1434,23 @@ SELECT save_database(); **Example:** ```sql -- Find all matches for a pattern -SELECT search_bytes('48 8B ? 00'); - --- Parse JSON results -SELECT json_extract(value, '$.address') as addr -FROM json_each(search_bytes('48 89 ?')) +SELECT address, matched_hex, size +FROM byte_search +WHERE pattern = '48 8B ? 00' LIMIT 10; -- First match only -SELECT printf('0x%llX', search_first('CC CC CC')); +SELECT printf('0x%llX', address) AS addr +FROM byte_search +WHERE pattern = 'CC CC CC' +ORDER BY address +LIMIT 1; -- Search with alternatives -SELECT search_bytes('E8 (01 02 03 04)'); +SELECT address, matched_hex +FROM byte_search +WHERE pattern = 'E8 (01 02 03 04)' +LIMIT 20; ``` **Optimization Pattern: Find functions using specific instruction** @@ -1396,65 +1458,130 @@ SELECT search_bytes('E8 (01 02 03 04)'); To answer "How many functions use RDTSC instruction?" efficiently: ```sql -- Count unique functions containing RDTSC (opcode: 0F 31) -SELECT COUNT(DISTINCT func_start(json_extract(value, '$.address'))) as count -FROM json_each(search_bytes('0F 31')) -WHERE func_start(json_extract(value, '$.address')) IS NOT NULL; +SELECT COUNT(DISTINCT f.address) as count +FROM byte_search b +JOIN funcs f ON b.address >= f.address AND b.address < f.end_ea +WHERE b.pattern = '0F 31'; -- List those functions with names SELECT DISTINCT - func_start(json_extract(value, '$.address')) as func_ea, - name_at(func_start(json_extract(value, '$.address'))) as func_name -FROM json_each(search_bytes('0F 31')) -WHERE func_start(json_extract(value, '$.address')) IS NOT NULL; + f.address as func_ea, + f.name as func_name +FROM byte_search b +JOIN funcs f ON b.address >= f.address AND b.address < f.end_ea +WHERE b.pattern = '0F 31'; ``` This is **much faster** than scanning all disassembly lines because: -- `search_bytes()` uses native binary search -- `func_start()` is O(1) lookup in IDA's function index +- `byte_search` uses IDA's native binary search +- the containment join uses the compact `funcs` table instead of scanning every instruction ### Names & Functions -Address argument note: `addr`/`ea`/`func_addr` parameters accept integer EAs, numeric strings, and symbol names. +Use table lookups for address and containing-function metadata. Resolve symbol names to integer EAs before using these patterns. -| Function | Description | -|----------|-------------| -| `name_at(addr)` | Name at address | -| `func_at(addr)` | Function name containing address | -| `func_start(addr)` | Start of containing function | -| `func_end(addr)` | End of containing function | -| `func_qty()` | Total function count | -| `func_at_index(n)` | Function address at index (O(1)) | +| Pattern | Description | +|---------|-------------| +| `SELECT name FROM names WHERE address = :ea LIMIT 1` | Name at address | +| `SELECT name FROM funcs WHERE :ea >= address AND :ea < end_ea LIMIT 1` | Function containing address | +| `SELECT address FROM funcs WHERE :ea >= address AND :ea < end_ea LIMIT 1` | Start of containing function | +| `SELECT end_ea FROM funcs WHERE :ea >= address AND :ea < end_ea LIMIT 1` | End of containing function | + +Function count and index lookup are table-driven: + +```sql +SELECT COUNT(*) AS function_count FROM funcs; +SELECT address FROM funcs WHERE rowid = 0; +``` ### Cross-References -| Function | Description | -|----------|-------------| -| `xrefs_to(addr)` | JSON array of xrefs TO address | -| `xrefs_from(addr)` | JSON array of xrefs FROM address | +Use the `xrefs` table for incoming, outgoing, and function-scoped edge queries: + +```sql +SELECT from_ea, to_ea, type, is_code, from_func +FROM xrefs +WHERE to_ea = 0x401000; + +SELECT from_ea, to_ea, type, is_code, from_func +FROM xrefs +WHERE from_ea = 0x401000; + +SELECT from_ea, to_ea, type, is_code, from_func +FROM xrefs +WHERE from_func = 0x401000; +``` ### Navigation -| Function | Description | -|----------|-------------| -| `next_head(addr)` | Next defined item | -| `prev_head(addr)` | Previous defined item | -| `segment_at(addr)` | Segment name at address | -| `hex(val)` | Format as hex string | +Use `heads` ordering for defined-item navigation, and SQLite formatting functions for display strings. Address equality/range filters are optimized; `ORDER BY address` or `ORDER BY address DESC` is consumed for next/previous-item lookups. + +```sql +-- Next defined item +SELECT address +FROM heads +WHERE address > 0x401000 +ORDER BY address +LIMIT 1; + +-- Previous defined item +SELECT address +FROM heads +WHERE address < 0x401000 +ORDER BY address DESC +LIMIT 1; + +-- Nullable scalar shape for callers that need one column and one row +SELECT ( + SELECT address + FROM heads + WHERE address > 0x401000 + ORDER BY address + LIMIT 1 +) AS next_address; + +-- Old IDASQL-style lowercase 0x-prefixed hex formatting +SELECT printf('0x%llx', address) AS address_hex +FROM heads +LIMIT 10; +``` + +Segment lookup is table-driven: + +```sql +SELECT name +FROM segments +WHERE 0x401000 >= start_ea + AND 0x401000 < end_ea +LIMIT 1; +``` ### Comments -| Function | Description | -|----------|-------------| -| `comment_at(addr)` | Get comment at address | -| `set_comment(addr, text)` | Set regular comment | -| `set_comment(addr, text, 1)` | Set repeatable comment | +Read address comments through the `comments` table. To preserve the old scalar lookup shape, use a scalar subquery with regular comments first and repeatable comments as fallback: + +```sql +SELECT ( + SELECT COALESCE(NULLIF(comment, ''), NULLIF(rpt_comment, '')) + FROM comments + WHERE address = 0x401000 + LIMIT 1 +) AS comment; +``` + +Write address comments through the table: + +```sql +INSERT INTO comments(address, comment) VALUES (0x401000, 'regular comment'); +INSERT INTO comments(address, rpt_comment) VALUES (0x401000, 'repeatable comment'); +``` ### Modification | Function | Description | |----------|-------------| -| `set_name(addr, name)` | Set name at address | | `type_at(addr)` | Read type declaration applied at address | | `set_type(addr, decl)` | Apply C declaration/type at address (empty decl clears type; `addr` may be EA, numeric string, or symbol name) | | `parse_decls(text)` | Import C declarations (struct/union/enum/typedef) into local types | Preferred SQL write surface for function metadata: - `UPDATE funcs SET name = '...', prototype = '...' WHERE address = ...` +- `INSERT INTO names(address, name) VALUES (..., '...')` or `UPDATE names SET name = '...' WHERE address = ...` - `prototype` maps to `type_at/set_type` behavior and invalidates decompiler cache. ### Python Execution @@ -1492,36 +1619,50 @@ SELECT get_ui_context_json(); ``` ### Item Analysis -| Function | Description | -|----------|-------------| -| `item_type(addr)` | Item type flags at address | -| `item_size(addr)` | Item size at address | -| `is_code(addr)` | Returns 1 if address is code | -| `is_data(addr)` | Returns 1 if address is data | -| `flags_at(addr)` | Raw IDA flags at address | +Use `heads` for item classification, size, and raw flags: + +```sql +SELECT address, size, type, flags, disasm +FROM heads +WHERE address = 0x401000; + +SELECT address, disasm +FROM heads +WHERE type = 'code' +ORDER BY address +LIMIT 10; +``` ### Instruction Details -| Function | Description | -|----------|-------------| -| `itype(addr)` | Instruction type code (processor-specific) | -| `decode_insn(addr)` | Full instruction info as JSON | -| `operand_type(addr, n)` | Operand type code (o_void, o_reg, etc.) | -| `operand_value(addr, n)` | Operand value (register num, immediate, etc.) | +Use `instructions` and `instruction_operands` for decoded instruction facts: ```sql --- Get instruction type for filtering -SELECT address, itype(address) as itype, mnemonic(address) -FROM heads WHERE is_code(address) = 1 LIMIT 10; +-- Instruction type and mnemonic for filtering +SELECT address, itype, mnemonic +FROM instructions +WHERE func_addr = 0x401000 +LIMIT 10; + +-- Operand type/value details for one instruction +SELECT opnum, text, type_code, type_name, value +FROM instruction_operands +WHERE address = 0x401000 +ORDER BY opnum; --- Decode full instruction -SELECT decode_insn(0x401000); +-- Full decoded instruction row shape +SELECT i.address, i.itype, i.mnemonic, i.size, o.opnum, o.text, o.type_name, o.value +FROM instructions i +LEFT JOIN instruction_operands o + ON o.address = i.address AND o.address = 0x401000 +WHERE i.address = 0x401000 +ORDER BY o.opnum; ``` ### Decompilation **When to use `decompile()` vs `pseudocode` table:** - **Read/show pseudocode** → always start with `SELECT decompile(addr)`. It returns the full function as one text block with per-line prefixes (`/* */` when available, `/* */` when no line anchor exists). -- **Local declaration hints** → declaration lines include compact local-variable index hints (`[lv:N]`) so rename operations can target `rename_lvar(func_addr, N, new_name)` safely. +- **Local declaration hints** → declaration lines include compact local-variable index hints (`[lv:N]`) so rename operations can target `UPDATE ctree_lvars ... WHERE func_addr = ... AND idx = N` safely. - **Need fresh output after edits** → use `SELECT decompile(addr, 1)` to force re-decompilation. - **Need structured line access or comment CRUD** → query/update the `pseudocode` table. @@ -1529,10 +1670,6 @@ SELECT decode_insn(0x401000); |----------|-------------| | `decompile(addr)` | **PREFERRED** — Full pseudocode with line prefixes (`addr` may be EA, numeric string, or symbol name; available when decompiler surfaces are enabled) | | `decompile(addr, 1)` | Same output but forces re-decompilation (use after writes/renames) | -| `list_lvars(addr)` | List local variables as JSON | -| `rename_lvar(func_addr, lvar_idx, new_name)` | Rename a local variable by index | -| `rename_lvar_by_name(func_addr, old_name, new_name)` | Rename a local variable by existing name | -| `set_lvar_comment(func_addr, lvar_idx, text)` | Set local-variable comment by index | | `set_union_selection(func_addr, ea, path)` | Set/clear union selection path at EA (`[0,1]` or `0,1`) | | `set_union_selection_item(func_addr, item_id, path)` | Set/clear union selection path by `ctree.item_id` | | `set_union_selection_ea_arg(func_addr, ea, arg_idx, path[, callee])` | **PREFERRED** call-arg targeting helper; resolves to item id or errors with hint | @@ -1569,9 +1706,9 @@ SELECT decompile(0x401000); 2. Baseline mutation surfaces (must exist in all supported plugin runtimes): ```sql -SELECT set_name(0x401000, 'my_func'); -SELECT rename_lvar(0x401000, 0, 'arg0'); -SELECT set_lvar_comment(0x401000, 0, 'seed comment'); +INSERT INTO names(address, name) VALUES (0x401000, 'my_func'); +UPDATE ctree_lvars SET name = 'arg0' WHERE func_addr = 0x401000 AND idx = 0; +UPDATE ctree_lvars SET comment = 'seed comment' WHERE func_addr = 0x401000 AND idx = 0; ``` 3. Advanced expression/representation helpers (optional in older/minimal runtimes): @@ -1620,7 +1757,7 @@ If `set_union_selection*` / `set_numform*` / `ctree_item_at` are unavailable: - Use `UPDATE funcs SET prototype = ...` for function-level typing. - Use `UPDATE ctree_lvars SET type/comment = ...` for local shaping. -- Prefer `rename_lvar*` for local names, even in fallback flows. +- Use `UPDATE ctree_lvars SET name = ...` after selecting a deterministic `idx`. - Use `UPDATE pseudocode SET comment = ...` for stable semantic breadcrumbs. - Keep constants readable via comments when enum rendering primitives are unavailable. - Explicitly note unavailable primitives in your response so follow-up runs don't waste queries. @@ -1633,29 +1770,35 @@ SELECT decompile(0x401000); SELECT decompile(0x401000, 1); -- Get all local variables in a function -SELECT list_lvars(0x401000); +SELECT idx, name, type, comment, size, is_arg, is_result, stkoff, mreg FROM ctree_lvars WHERE func_addr = 0x401000 ORDER BY idx; -- Rename by index (canonical, deterministic) -SELECT rename_lvar(0x401000, 2, 'buffer_size'); +UPDATE ctree_lvars SET name = 'buffer_size' WHERE func_addr = 0x401000 AND idx = 2; --- Rename by current name (convenience; fails if ambiguous) -SELECT rename_lvar_by_name(0x401000, 'v2', 'buffer_size'); +-- Rename by current name: inspect/select one idx first, then update by idx +UPDATE ctree_lvars SET name = 'buffer_size' +WHERE func_addr = 0x401000 + AND idx = ( + SELECT idx FROM ctree_lvars + WHERE func_addr = 0x401000 AND name = 'v2' + ORDER BY idx LIMIT 1 + ); -- If you discovered the target via stack slot or another query, resolve idx first -SELECT rename_lvar( - 0x401000, - (SELECT idx - FROM ctree_lvars - WHERE func_addr = 0x401000 AND stkoff = 32 - ORDER BY idx - LIMIT 1), - 'ctx'); +UPDATE ctree_lvars SET name = 'ctx' +WHERE func_addr = 0x401000 + AND idx = ( + SELECT idx + FROM ctree_lvars + WHERE func_addr = 0x401000 AND stkoff = 32 + ORDER BY idx + LIMIT 1 + ); -- Set local-variable comment by index -SELECT set_lvar_comment(0x401000, 2, 'points to decrypted buffer'); +UPDATE ctree_lvars SET comment = 'points to decrypted buffer' WHERE func_addr = 0x401000 AND idx = 2; -- Simple current-row UPDATE path for rename --- Prefer rename_lvar* for split/array locals or scripted cleanup UPDATE ctree_lvars SET name = 'buffer_size' WHERE func_addr = 0x401000 AND idx = 2; @@ -1665,10 +1808,10 @@ WHERE func_addr = 0x401000 AND idx = 2; -- Fallback when direct UPDATE comment write fails on a specific lvar -- (some runtimes can return "SQL logic error" for particular slots): -SELECT set_lvar_comment(0x401000, 2, 'points to decrypted buffer'); +UPDATE ctree_lvars SET comment = 'points to decrypted buffer' WHERE func_addr = 0x401000 AND idx = 2; -- Mandatory verification loop after rename -SELECT list_lvars(0x401000); +SELECT idx, name, type, comment, size, is_arg, is_result, stkoff, mreg FROM ctree_lvars WHERE func_addr = 0x401000 ORDER BY idx; SELECT decompile(0x401000, 1); -- Import declarations + apply prototype to improve decompilation quality @@ -1740,26 +1883,19 @@ SELECT ctree_item_at(0x140001BD0, 0x140001C49, 'cot_asg', 0); SELECT set_union_selection_ea_expr(0x140001BD0, 0x140001C49, '', 'cot_asg', 0); ``` -`rename_lvar*` functions return JSON with explicit fields: -- `success` (execution success) -- `applied` (observable rename applied) -- `reason` (for non-applied cases: `not_found`, `ambiguous_name`, `unchanged`, `not_nameable`, ...) +Decompiler local and label mutation is table-driven: +- List locals with `ctree_lvars WHERE func_addr = ... ORDER BY idx`. +- Rename/comment locals with `UPDATE ctree_lvars` using `func_addr + idx`. +- Rename labels with `UPDATE ctree_labels` using `func_addr + label_num`. ### File Generation | Function | Description | |----------|-------------| -| `gen_asm_file(start, end, path)` | Generate ASM file | -| `gen_lst_file(start, end, path)` | Generate listing file | -| `gen_map_file(path)` | Generate MAP file | -| `gen_idc_file(start, end, path)` | Generate IDC script | -| `gen_html_file(start, end, path)` | Generate HTML file | +| `gen_listing(path)` | Generate full-database listing output (LST) to `path` | ```sql --- Export function as ASM -SELECT gen_asm_file(0x401000, 0x401100, '/tmp/func.asm'); - --- Generate MAP file -SELECT gen_map_file('/tmp/binary.map'); +-- Whole database listing export +SELECT gen_listing('C:/tmp/full.lst'); ``` ### Graph Generation @@ -1781,7 +1917,6 @@ SELECT gen_schema_dot(); | Surface | Description | |---------|-------------| | `grep` table | Structured rows for composable SQL search | -| `grep(pattern, limit, offset)` | JSON array for quick agent/tool output | Searches functions, labels, segments, structs, unions, enums, members, and enum members. Pattern rules: @@ -1802,27 +1937,24 @@ FROM grep WHERE pattern = 'main' LIMIT 20; --- JSON form with pagination -SELECT grep('sub%', 10, 0); -SELECT grep('sub%', 10, 10); - --- Parse JSON result from grep() -SELECT json_extract(value, '$.name') as name, - printf('0x%llX', json_extract(value, '$.address')) as addr -FROM json_each(grep('init', 50, 0)) -WHERE json_extract(value, '$.kind') = 'function'; +-- Pagination is ordinary SQL +SELECT name, kind, address +FROM grep +WHERE pattern = 'init' AND kind = 'function' +ORDER BY kind, name +LIMIT 50 OFFSET 0; ``` -### String List Functions +### String List Surfaces -IDA maintains a cached list of strings. Use `rebuild_strings()` to detect and cache strings. +IDA maintains a cached list of strings. Use `rebuild_strings()` to detect and cache strings, `COUNT(*) FROM strings` for the current count, and `strings` for row-level analysis. -| Function | Description | -|----------|-------------| +| Surface | Description | +|---------|-------------| | `rebuild_strings()` | Rebuild with ASCII + UTF-16, minlen 5 (default) | | `rebuild_strings(minlen)` | Rebuild with custom minimum length | | `rebuild_strings(minlen, types)` | Rebuild with custom length and type mask | -| `string_count()` | Get current string count (no rebuild) | +| `SELECT COUNT(*) FROM strings` | Current string-list count (optimized without row materialization) | **Type mask values:** - `1` = ASCII only (STRTYPE_C) @@ -1833,7 +1965,7 @@ IDA maintains a cached list of strings. Use `rebuild_strings()` to detect and ca ```sql -- Check current string count -SELECT string_count(); +SELECT COUNT(*) AS strings FROM strings; -- Rebuild with defaults (ASCII + UTF-16, minlen 5) SELECT rebuild_strings(); @@ -1896,8 +2028,6 @@ WHERE g.pattern = 'sub%' AND g.kind = 'function'; | `parent_name` | TEXT | Parent type (for members) | | `full_name` | TEXT | Fully qualified name | -For JSON output instead of rows, use `grep(pattern, limit, offset)`. - --- ## Performance Rules @@ -1933,7 +2063,9 @@ WHERE itype IN (16, 18) -- x86 call opcodes SELECT address FROM funcs ORDER BY RANDOM() LIMIT 1; -- FAST: O(1) - direct index access -SELECT func_at_index(ABS(RANDOM()) % func_qty()); +SELECT address +FROM funcs +WHERE rowid = ABS(RANDOM()) % (SELECT COUNT(*) FROM funcs); ``` --- @@ -1955,7 +2087,7 @@ LIMIT 10; ### Find Functions Calling a Specific API ```sql -SELECT DISTINCT func_at(from_ea) as caller +SELECT DISTINCT (SELECT name FROM funcs WHERE from_ea >= address AND from_ea < end_ea LIMIT 1) as caller FROM xrefs WHERE to_ea = (SELECT address FROM imports WHERE name = 'CreateFileW'); ``` @@ -1963,7 +2095,7 @@ WHERE to_ea = (SELECT address FROM imports WHERE name = 'CreateFileW'); ### String Cross-Reference Analysis ```sql -SELECT s.content, func_at(x.from_ea) as used_by +SELECT s.content, (SELECT name FROM funcs WHERE x.from_ea >= address AND x.from_ea < end_ea LIMIT 1) as used_by FROM strings s JOIN xrefs x ON s.address = x.to_ea WHERE s.content LIKE '%password%'; @@ -1972,7 +2104,7 @@ WHERE s.content LIKE '%password%'; ### Function Complexity (by Block Count) ```sql -SELECT func_at(func_ea) as name, COUNT(*) as block_count +SELECT (SELECT name FROM funcs WHERE func_ea >= address AND func_ea < end_ea LIMIT 1) as name, COUNT(*) as block_count FROM blocks GROUP BY func_ea ORDER BY block_count DESC @@ -2016,7 +2148,7 @@ ORDER BY f.name; ### Find Zero Comparisons (Potential Error Checks) ```sql -SELECT func_at(func_addr) as func, printf('0x%X', ea) as addr +SELECT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as func, printf('0x%X', ea) as addr FROM ctree_v_comparisons WHERE op_name = 'cot_eq' AND rhs_op = 'cot_num' AND rhs_num = 0; ``` @@ -2033,7 +2165,7 @@ ORDER BY f.name; ### malloc with Constant Size ```sql -SELECT func_at(c.func_addr) as func, a.arg_num_value as size +SELECT (SELECT name FROM funcs WHERE c.func_addr >= address AND c.func_addr < end_ea LIMIT 1) as func, a.arg_num_value as size FROM ctree_v_calls c JOIN ctree_call_args a ON a.func_addr = c.func_addr AND a.call_item_id = c.item_id WHERE c.callee_name LIKE '%malloc%' @@ -2156,15 +2288,15 @@ HAVING COUNT(*) > 3 ORDER BY return_count DESC; -- Functions that return 0 (common success pattern) -SELECT DISTINCT func_at(func_addr) as name FROM ctree_v_returns +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name FROM ctree_v_returns WHERE return_op = 'cot_num' AND return_num = 0; -- Functions that return -1 (error sentinel) -SELECT DISTINCT func_at(func_addr) as name FROM ctree_v_returns +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name FROM ctree_v_returns WHERE return_op = 'cot_num' AND return_num = -1; -- Functions that return a specific constant -SELECT DISTINCT func_at(func_addr) as name FROM ctree_v_returns +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as name FROM ctree_v_returns WHERE return_op = 'cot_num' AND return_num = 1; ``` @@ -2346,7 +2478,7 @@ WITH RECURSIVE callers AS ( JOIN disasm_calls dc ON dc.callee_addr = c.func_addr WHERE c.depth < 5 ) -SELECT func_at(func_addr) as caller, MIN(depth) as distance +SELECT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as caller, MIN(depth) as distance FROM callers GROUP BY func_addr ORDER BY distance, caller; @@ -2357,11 +2489,14 @@ ORDER BY distance, caller; ```sql -- Rank functions by size within each segment SELECT - segment_at(f.address) as seg, + s.name as seg, f.name, f.size, - ROW_NUMBER() OVER (PARTITION BY segment_at(f.address) ORDER BY f.size DESC) as rank + ROW_NUMBER() OVER (PARTITION BY s.start_ea ORDER BY f.size DESC) as rank FROM funcs f +JOIN segments s + ON f.address >= s.start_ea + AND f.address < s.end_ea WHERE f.size > 0; ``` @@ -2395,12 +2530,15 @@ WHERE size > 100; SELECT f.name, f.size, - segment_at(f.address) as segment, + s.name as segment, (SELECT COUNT(*) FROM blocks WHERE func_ea = f.address) as block_count, (SELECT COUNT(*) FROM disasm_calls WHERE func_addr = f.address) as outgoing_calls, (SELECT COUNT(*) FROM xrefs WHERE to_ea = f.address AND is_code = 1) as incoming_calls, (SELECT COUNT(*) FROM ctree_lvars WHERE func_addr = f.address) as local_vars FROM funcs f +JOIN segments s + ON f.address >= s.start_ea + AND f.address < s.end_ea ORDER BY f.size DESC LIMIT 20; ``` @@ -2479,25 +2617,25 @@ WHERE length > 5; ```sql -- Comprehensive security audit in one query -SELECT 'dangerous_func' as check_type, func_at(func_addr) as location, callee_name as detail +SELECT 'dangerous_func' as check_type, (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) as location, callee_name as detail FROM disasm_calls WHERE callee_name IN ('strcpy', 'strcat', 'sprintf', 'gets', 'scanf') UNION ALL -SELECT 'crypto_usage', func_at(func_addr), callee_name +SELECT 'crypto_usage', (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1), callee_name FROM disasm_calls WHERE callee_name LIKE '%Crypt%' OR callee_name LIKE '%AES%' OR callee_name LIKE '%RSA%' UNION ALL -SELECT 'network_call', func_at(func_addr), callee_name +SELECT 'network_call', (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1), callee_name FROM disasm_calls WHERE callee_name IN ('socket', 'connect', 'send', 'recv', 'WSAStartup') UNION ALL -SELECT 'registry_access', func_at(func_addr), callee_name +SELECT 'registry_access', (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1), callee_name FROM disasm_calls WHERE callee_name LIKE 'Reg%' @@ -2612,7 +2750,7 @@ Common Hex-Rays AST node types: - **No Hex-Rays license:** Decompiler tables (`pseudocode`, `ctree*`, `ctree_lvars`) will be empty or unavailable - **No constraint on decompiler tables:** Query will be extremely slow (decompiles all functions) -- **Invalid address:** Functions like `func_at(addr)` return NULL +- **Invalid address:** Containing-function table lookups return no row; use a scalar subquery when you need a nullable scalar result - **Missing function:** JOINs may return fewer rows than expected --- @@ -2636,7 +2774,7 @@ SELECT content FROM strings WHERE length > 10 ORDER BY length DESC LIMIT 20; ```sql -- Dangerous string functions -SELECT DISTINCT func_at(func_addr) FROM disasm_calls +SELECT DISTINCT (SELECT name FROM funcs WHERE func_addr >= address AND func_addr < end_ea LIMIT 1) FROM disasm_calls WHERE callee_name IN ('strcpy', 'strcat', 'sprintf', 'gets'); -- Crypto-related @@ -2665,13 +2803,13 @@ SELECT name, type, size FROM ctree_lvars WHERE func_addr = 0x401000; SELECT callee_name FROM disasm_calls WHERE func_addr = 0x401000; -- What calls it -SELECT func_at(from_ea) FROM xrefs WHERE to_ea = 0x401000 AND is_code = 1; +SELECT (SELECT name FROM funcs WHERE from_ea >= address AND from_ea < end_ea LIMIT 1) FROM xrefs WHERE to_ea = 0x401000 AND is_code = 1; ``` ### "Find all uses of a string" ```sql -SELECT s.content, func_at(x.from_ea) as function, printf('0x%X', x.from_ea) as location +SELECT s.content, (SELECT name FROM funcs WHERE x.from_ea >= address AND x.from_ea < end_ea LIMIT 1) as function, printf('0x%X', x.from_ea) as location FROM strings s JOIN xrefs x ON s.address = x.to_ea WHERE s.content LIKE '%config%'; @@ -2849,7 +2987,6 @@ WHERE calling_conv = 'fastcall' AND return_is_ptr = 1; | Modify database | `funcs`, `names`, `comments`, `bookmarks` (INSERT/UPDATE/DELETE) | | Store custom key-value data | `netnode_kv` (full CRUD, persists in IDB) | | Entity search (structured) | `grep WHERE pattern = '...'` | -| Entity search (JSON) | `grep('pattern', limit, offset)` | **Remember:** Always use `func_addr = X` constraints on instruction and decompiler tables for acceptable performance. diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt index 83a6386..b6e7126 100644 --- a/src/cli/CMakeLists.txt +++ b/src/cli/CMakeLists.txt @@ -10,7 +10,7 @@ ida_add_idalib(idasql_cli target_link_libraries(idasql_cli PRIVATE xsql::xsql) target_link_libraries(idasql_cli PRIVATE idasql) target_include_directories(idasql_cli PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../lib/include) -target_compile_definitions(idasql_cli PRIVATE USE_IDA_SDK) +target_compile_definitions(idasql_cli PRIVATE USE_IDA_SDK USE_HEXRAYS) # Output name: idasql (not idasql_cli), placed next to ida.exe for idalib set_target_properties(idasql_cli PROPERTIES @@ -48,12 +48,6 @@ elseif(UNIX) ) endif() -# Hex-Rays decompiler support -option(USE_HEXRAYS "Enable Hex-Rays decompiler support" ON) -if(USE_HEXRAYS) - target_compile_definitions(idasql_cli PRIVATE USE_HEXRAYS) -endif() - # MCP support (from parent project) if(IDASQL_WITH_MCP) target_sources(idasql_cli PRIVATE diff --git a/src/common/idasql_version.hpp b/src/common/idasql_version.hpp index 0eb4511..b52dd6e 100644 --- a/src/common/idasql_version.hpp +++ b/src/common/idasql_version.hpp @@ -13,5 +13,5 @@ #define IDASQL_VERSION_MAJOR 0 #define IDASQL_VERSION_MINOR 0 -#define IDASQL_VERSION_PATCH 13 -#define IDASQL_VERSION_STRING "0.0.13" +#define IDASQL_VERSION_PATCH 14 +#define IDASQL_VERSION_STRING "0.0.14" diff --git a/src/common/mcp_server.cpp b/src/common/mcp_server.cpp index cd9658a..11f4e33 100644 --- a/src/common/mcp_server.cpp +++ b/src/common/mcp_server.cpp @@ -25,6 +25,10 @@ namespace idasql { using Json = nlohmann::json; +static bool starts_with_text(const std::string& value, const char* prefix) { + return value.rfind(prefix, 0) == 0; +} + class IDAMCPServer::Impl { public: fastmcpp::tools::ToolManager tool_manager; @@ -107,7 +111,7 @@ MCPQueueResult IDAMCPServer::queue_and_wait(MCPPendingCommand::Type type, const } // Convention: query callbacks return "Error: ..." on failure - bool ok = !cmd->result.starts_with("Error: "); + bool ok = !starts_with_text(cmd->result, "Error: "); return {ok, cmd->result}; } @@ -177,7 +181,7 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, } result = query_cb_(query); // Convention: query callbacks return "Error: ..." on failure - if (result.starts_with("Error: ")) { + if (starts_with_text(result, "Error: ")) { success = false; } } @@ -212,13 +216,17 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, "/messages" ); + // Mark the IDASQL-side server state before exposing the SSE endpoint. The + // transport can accept a client immediately after start(), before this + // method returns to the CLI loop that drains queued commands. + running_.store(true); if (!impl_->server->start()) { + running_.store(false); impl_.reset(); return -1; } port_ = impl_->server->port(); - running_.store(true); return port_; } diff --git a/src/lib/include/idasql/fwd.hpp b/src/lib/include/idasql/fwd.hpp index 88ea6f7..82e078d 100644 --- a/src/lib/include/idasql/fwd.hpp +++ b/src/lib/include/idasql/fwd.hpp @@ -52,7 +52,7 @@ namespace functions { } namespace search { - bool register_search_bytes(xsql::Database& db); + bool register_byte_search(xsql::Database& db); } } // namespace idasql diff --git a/src/lib/include/idasql/vtable.hpp b/src/lib/include/idasql/vtable.hpp index e368679..c1ed1fa 100644 --- a/src/lib/include/idasql/vtable.hpp +++ b/src/lib/include/idasql/vtable.hpp @@ -24,6 +24,7 @@ * * auto xrefs_table = idasql::cached_table("xrefs") * .estimate_rows([]() { return get_func_qty() * 10; }) + * .count([]() { return get_xref_qty(); }) // Optional COUNT(*) fast path * .cache_builder([](auto& cache) { ... populate ... }) * .column_int64("from_ea", [](const XrefInfo& r) { return r.from_ea; }) * .build(); diff --git a/src/lib/src/database.cpp b/src/lib/src/database.cpp index 7a38e1b..b5344da 100644 --- a/src/lib/src/database.cpp +++ b/src/lib/src/database.cpp @@ -408,7 +408,7 @@ void QueryEngine::init() { decompiler_->register_all(db_); functions::register_sql_functions(db_); - search::register_search_bytes(db_); + search::register_byte_search(db_); } // ============================================================================ diff --git a/src/lib/src/decompiler.cpp b/src/lib/src/decompiler.cpp index f42fef2..3e62892 100644 --- a/src/lib/src/decompiler.cpp +++ b/src/lib/src/decompiler.cpp @@ -1353,6 +1353,52 @@ void LvarsInFuncIterator::column(xsql::FunctionContext& ctx, int col) { int64_t LvarsInFuncIterator::rowid() const { return static_cast(idx_); } +// --- CtreeLabelsInFuncIterator --- + +CtreeLabelsInFuncIterator::CtreeLabelsInFuncIterator(ea_t func_addr) { + collect_ctree_labels(labels_, func_addr); +} + +bool CtreeLabelsInFuncIterator::next() { + if (!started_) { + started_ = true; + if (labels_.empty()) return false; + idx_ = 0; + return true; + } + if (idx_ + 1 < labels_.size()) { ++idx_; return true; } + idx_ = labels_.size(); + return false; +} + +bool CtreeLabelsInFuncIterator::eof() const { + return started_ && idx_ >= labels_.size(); +} + +void CtreeLabelsInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= labels_.size()) { ctx.result_null(); return; } + const auto& label = labels_[idx_]; + switch (col) { + case 0: ctx.result_int64(label.func_addr); break; + case 1: ctx.result_int(label.label_num); break; + case 2: ctx.result_text(label.name.c_str()); break; + case 3: ctx.result_int(label.item_id); break; + case 4: + if (label.item_ea != BADADDR) { + ctx.result_int64(label.item_ea); + } else { + ctx.result_int64(0); + } + break; + case 5: ctx.result_int(label.is_user_defined ? 1 : 0); break; + default: ctx.result_null(); break; + } +} + +int64_t CtreeLabelsInFuncIterator::rowid() const { + return static_cast(idx_); +} + // --- CtreeInFuncIterator --- CtreeInFuncIterator::CtreeInFuncIterator(ea_t func_addr) { @@ -2021,7 +2067,7 @@ LabelRenameResult rename_label_ex(ea_t func_addr, int label_num, const char* new user_labels_second(it) = new_name; } - save_user_labels(func_addr, labels, &*cfunc); + save_user_labels(func_addr, labels); user_labels_free(labels); invalidate_decompiler_cache(func_addr); @@ -2448,6 +2494,22 @@ CachedTableDef define_ctree_labels() { row = rows[idx]; return true; }) + .row_populator([](CtreeLabelInfo& row, int argc, xsql::FunctionArg* argv) { + // argv[2]=func_addr, argv[3]=label_num, argv[4]=name, argv[5]=item_id, + // argv[6]=item_ea, argv[7]=is_user_defined + if (argc > 2) row.func_addr = static_cast(argv[2].as_int64()); + if (argc > 3) row.label_num = argv[3].as_int(); + if (argc > 4 && !argv[4].is_null()) { + const char* v = argv[4].as_c_str(); + row.name = v ? v : ""; + } + if (argc > 5) row.item_id = argv[5].as_int(); + if (argc > 6) { + ea_t ea = static_cast(argv[6].as_int64()); + row.item_ea = ea != 0 ? ea : BADADDR; + } + if (argc > 7) row.is_user_defined = argv[7].as_int() != 0; + }) .column_int64("func_addr", [](const CtreeLabelInfo& row) -> int64_t { return row.func_addr; }) .column_int("label_num", [](const CtreeLabelInfo& row) -> int { return row.label_num; }) .column_text_rw("name", @@ -2456,11 +2518,11 @@ CachedTableDef define_ctree_labels() { }, [](CtreeLabelInfo& row, const char* new_name) -> bool { const std::string requested = new_name ? new_name : ""; - if (requested == row.name) return true; - bool ok = rename_label(row.func_addr, row.label_num, new_name); - if (!ok) { + LabelRenameResult r = rename_label_ex(row.func_addr, row.label_num, new_name); + if (!r.success || (!r.applied && r.reason != "unchanged")) { xsql::set_vtab_error( - "ctree_labels rename failed (func=" + + "ctree_labels rename failed: " + r.reason + + " (func=" + format_ea_hex(row.func_addr) + " label=" + std::to_string(row.label_num) + ")"); return false; @@ -2478,6 +2540,9 @@ CachedTableDef define_ctree_labels() { .column_int("is_user_defined", [](const CtreeLabelInfo& row) -> int { return row.is_user_defined ? 1 : 0; }) + .filter_eq("func_addr", [](int64_t func_addr) -> std::unique_ptr { + return std::make_unique(static_cast(func_addr)); + }, 10.0, 8.0) .build(); } diff --git a/src/lib/src/decompiler.hpp b/src/lib/src/decompiler.hpp index e77b048..34d6159 100644 --- a/src/lib/src/decompiler.hpp +++ b/src/lib/src/decompiler.hpp @@ -410,6 +410,20 @@ class LvarsInFuncIterator : public xsql::RowIterator { int64_t rowid() const override; }; +// Ctree label iterator for single function +class CtreeLabelsInFuncIterator : public xsql::RowIterator { + std::vector labels_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit CtreeLabelsInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + // Ctree iterator for single function class CtreeInFuncIterator : public xsql::RowIterator { std::vector items_; diff --git a/src/lib/src/entities.cpp b/src/lib/src/entities.cpp index 0e03bb3..b696ace 100644 --- a/src/lib/src/entities.cpp +++ b/src/lib/src/entities.cpp @@ -166,6 +166,7 @@ CachedTableDef define_funcs() { return cached_table("funcs") .no_shared_cache() .estimate_rows([]() -> size_t { return get_func_qty(); }) + .count([]() -> size_t { return get_func_qty(); }) .cache_builder([](std::vector &rows) { rows.clear(); const size_t n = get_func_qty(); @@ -1360,6 +1361,7 @@ CachedTableDef define_strings() { return cached_table("strings") .no_shared_cache() .estimate_rows([]() -> size_t { return get_strlist_qty(); }) + .count([]() -> size_t { return get_strlist_qty(); }) .cache_builder([](std::vector &cache) { size_t n = get_strlist_qty(); for (size_t i = 0; i < n; i++) { @@ -1552,13 +1554,167 @@ const char *get_item_type_str(ea_t ea) { return "other"; } -CachedTableDef define_heads() { - return cached_table("heads") - .no_shared_cache() +namespace { + +enum class HeadOrder { Asc, Desc }; + +struct HeadBounds { + bool has_lower = false; + ea_t lower = 0; + bool lower_inclusive = true; + bool has_upper = false; + ea_t upper = 0; + bool upper_inclusive = true; +}; + +bool is_defined_head(ea_t ea) { + return ea != BADADDR && is_head(get_flags(ea)); +} + +ea_t normalize_sql_ea(int64_t value) { + return static_cast(static_cast(value)); +} + +void tighten_lower_bound(HeadBounds &bounds, ea_t ea, bool inclusive) { + if (!bounds.has_lower || ea > bounds.lower || + (ea == bounds.lower && !inclusive && bounds.lower_inclusive)) { + bounds.has_lower = true; + bounds.lower = ea; + bounds.lower_inclusive = inclusive; + } +} + +void tighten_upper_bound(HeadBounds &bounds, ea_t ea, bool inclusive) { + if (!bounds.has_upper || ea < bounds.upper || + (ea == bounds.upper && !inclusive && bounds.upper_inclusive)) { + bounds.has_upper = true; + bounds.upper = ea; + bounds.upper_inclusive = inclusive; + } +} + +bool head_within_bounds(ea_t ea, const HeadBounds &bounds) { + if (ea == BADADDR) + return false; + if (bounds.has_lower && + (ea < bounds.lower || + (ea == bounds.lower && !bounds.lower_inclusive))) { + return false; + } + if (bounds.has_upper && + (ea > bounds.upper || + (ea == bounds.upper && !bounds.upper_inclusive))) { + return false; + } + return true; +} + +class HeadsGenerator : public xsql::Generator { + HeadOrder order_; + HeadBounds bounds_; + bool started_ = false; + ea_t current_ea_ = BADADDR; + mutable HeadRow row_{BADADDR}; + + ea_t first_ascending() const { + const ea_t max_ea = inf_get_max_ea(); + ea_t start = bounds_.has_lower ? bounds_.lower : inf_get_min_ea(); + if (start == BADADDR || start >= max_ea) + return BADADDR; + if (bounds_.has_lower && !bounds_.lower_inclusive) + return next_head(start, max_ea); + if (is_defined_head(start)) + return start; + return next_head(start, max_ea); + } + + ea_t first_descending() const { + const ea_t min_ea = inf_get_min_ea(); + ea_t start = bounds_.has_upper ? bounds_.upper : inf_get_max_ea(); + if (start == BADADDR) + return BADADDR; + if (bounds_.has_upper && bounds_.upper_inclusive && + is_defined_head(start)) { + return start; + } + if (start <= min_ea) + return BADADDR; + return prev_head(start, min_ea); + } + +public: + HeadsGenerator(HeadOrder order, HeadBounds bounds) + : order_(order), bounds_(bounds) {} + + bool next() override { + ea_t next_ea = BADADDR; + if (!started_) { + started_ = true; + next_ea = + order_ == HeadOrder::Asc ? first_ascending() : first_descending(); + } else if (order_ == HeadOrder::Asc) { + next_ea = next_head(current_ea_, inf_get_max_ea()); + } else { + next_ea = prev_head(current_ea_, inf_get_min_ea()); + } + + if (!head_within_bounds(next_ea, bounds_)) { + current_ea_ = BADADDR; + return false; + } + + current_ea_ = next_ea; + row_.ea = current_ea_; + return true; + } + + const HeadRow ¤t() const override { return row_; } + + int64_t rowid() const override { return static_cast(current_ea_); } +}; + +void apply_head_constraint(HeadBounds &bounds, + const xsql::GeneratorConstraintArg &arg) { + const ea_t ea = normalize_sql_ea(arg.value.as_int64()); + switch (arg.op) { + case xsql::ConstraintOp::Eq: + tighten_lower_bound(bounds, ea, true); + tighten_upper_bound(bounds, ea, true); + break; + case xsql::ConstraintOp::Gt: + tighten_lower_bound(bounds, ea, false); + break; + case xsql::ConstraintOp::Ge: + tighten_lower_bound(bounds, ea, true); + break; + case xsql::ConstraintOp::Lt: + tighten_upper_bound(bounds, ea, false); + break; + case xsql::ConstraintOp::Le: + tighten_upper_bound(bounds, ea, true); + break; + } +} + +std::unique_ptr> +make_heads_generator(HeadOrder order, + const std::vector &args) { + HeadBounds bounds; + for (const auto &arg : args) { + apply_head_constraint(bounds, arg); + } + return std::make_unique(order, bounds); +} + +} // namespace + +GeneratorTableDef define_heads() { + return generator_table("heads") .estimate_rows( []() -> size_t { return static_cast(get_nlist_size()); }) - .cache_builder( - [](std::vector &rows) { collect_head_rows(rows); }) + .generator([]() -> std::unique_ptr> { + return std::make_unique(HeadOrder::Asc, HeadBounds{}); + }) .column_int64("address", [](const HeadRow &row) -> int64_t { return static_cast(row.ea); @@ -1582,83 +1738,285 @@ CachedTableDef define_heads() { tag_remove(&line); return line.c_str(); }) + .constraint_filter( + {xsql::required_eq("address", "")}, + [](const std::vector &args) + -> std::unique_ptr> { + return make_heads_generator(HeadOrder::Asc, args); + }, + 1.0, 1.0) + .constraint_filter( + {xsql::optional_ge("address"), xsql::optional_gt("address"), + xsql::optional_lt("address"), xsql::optional_le("address")}, + [](const std::vector &args) + -> std::unique_ptr> { + return make_heads_generator(HeadOrder::Asc, args); + }, + 10.0, 100.0) + .order_by_consumed("address") + .constraint_filter( + {xsql::optional_ge("address"), xsql::optional_gt("address"), + xsql::optional_lt("address"), xsql::optional_le("address")}, + [](const std::vector &args) + -> std::unique_ptr> { + return make_heads_generator(HeadOrder::Desc, args); + }, + 10.0, 100.0) + .order_by_consumed("address", true) .build(); } // ============================================================================ -// BYTES Table - Read/write byte values with patch support +// BYTES Table - Raw mapped bytes with patch support // ============================================================================ -BytesAtIterator::BytesAtIterator(ea_t ea) : ea_(ea) {} +namespace { + +enum class ByteOrder { Asc, Desc }; -bool BytesAtIterator::next() { - if (yielded_) { - // Second call - exhausted - exhausted_ = true; +struct ByteBounds { + bool has_lower = false; + ea_t lower = 0; + bool lower_inclusive = true; + bool has_upper = false; + ea_t upper = 0; + bool upper_inclusive = true; +}; + +void tighten_byte_lower_bound(ByteBounds &bounds, ea_t ea, bool inclusive) { + if (!bounds.has_lower || ea > bounds.lower || + (ea == bounds.lower && !inclusive && bounds.lower_inclusive)) { + bounds.has_lower = true; + bounds.lower = ea; + bounds.lower_inclusive = inclusive; + } +} + +void tighten_byte_upper_bound(ByteBounds &bounds, ea_t ea, bool inclusive) { + if (!bounds.has_upper || ea < bounds.upper || + (ea == bounds.upper && !inclusive && bounds.upper_inclusive)) { + bounds.has_upper = true; + bounds.upper = ea; + bounds.upper_inclusive = inclusive; + } +} + +bool byte_beyond_upper(ea_t ea, const ByteBounds &bounds) { + return bounds.has_upper && + (ea > bounds.upper || (ea == bounds.upper && !bounds.upper_inclusive)); +} + +bool byte_below_lower(ea_t ea, const ByteBounds &bounds) { + return bounds.has_lower && + (ea < bounds.lower || (ea == bounds.lower && !bounds.lower_inclusive)); +} + +bool byte_within_bounds(ea_t ea, const ByteBounds &bounds) { + if (ea == BADADDR) return false; + return !byte_below_lower(ea, bounds) && !byte_beyond_upper(ea, bounds); +} + +bool is_mapped_byte_address(ea_t ea) { + return ea != BADADDR && is_mapped(ea); +} + +size_t estimate_mapped_byte_rows() { + uint64 total = 0; + for (int i = 0; i < get_segm_qty(); ++i) { + segment_t *seg = getnseg(i); + if (!seg || seg->end_ea <= seg->start_ea) + continue; + total += static_cast(seg->end_ea - seg->start_ea); + if (total > static_cast(std::numeric_limits::max())) + return std::numeric_limits::max(); } - // First call - yield the single row - yielded_ = true; - return true; + return static_cast(total); } -bool BytesAtIterator::eof() const { return exhausted_; } +class BytesGenerator : public xsql::Generator { + ByteOrder order_; + ByteBounds bounds_; + bool started_ = false; + ea_t current_ea_ = BADADDR; + mutable ByteRow row_{BADADDR}; + + ea_t first_ascending() const { + ea_t start = bounds_.has_lower ? bounds_.lower : inf_get_min_ea(); + if (start == BADADDR) + return BADADDR; + if (bounds_.has_lower && !bounds_.lower_inclusive) { + if (start == BADADDR - 1) + return BADADDR; + ++start; + } + return next_mapped_at_or_after(start); + } + + ea_t first_descending() const { + if (get_segm_qty() <= 0) + return BADADDR; -void BytesAtIterator::column(xsql::FunctionContext &ctx, int col) { - switch (col) { - case 0: // ea - ctx.result_int64(ea_); - break; - case 1: // value - ctx.result_int(get_byte(ea_)); - break; - case 2: // original_value - ctx.result_int(static_cast(get_original_byte(ea_))); + ea_t start = BADADDR; + if (bounds_.has_upper) { + start = bounds_.upper; + if (!bounds_.upper_inclusive) { + if (start == 0) + return BADADDR; + --start; + } + } else { + segment_t *seg = get_last_seg(); + if (!seg || seg->end_ea <= seg->start_ea) + return BADADDR; + start = seg->end_ea - 1; + } + return prev_mapped_at_or_before(start); + } + + ea_t next_mapped_at_or_after(ea_t start) const { + segment_t *seg = getseg(start); + if (!seg) + seg = get_next_seg(start); + + while (seg) { + ea_t ea = start > seg->start_ea ? start : seg->start_ea; + while (ea < seg->end_ea) { + if (byte_beyond_upper(ea, bounds_)) + return BADADDR; + if (byte_within_bounds(ea, bounds_) && is_mapped_byte_address(ea)) + return ea; + if (ea == BADADDR - 1) + return BADADDR; + ++ea; + } + + seg = get_next_seg(seg->start_ea); + if (seg) + start = seg->start_ea; + } + return BADADDR; + } + + ea_t prev_mapped_at_or_before(ea_t start) const { + segment_t *seg = getseg(start); + if (!seg) + seg = get_prev_seg(start); + + while (seg) { + if (seg->end_ea <= seg->start_ea) { + seg = get_prev_seg(seg->start_ea); + continue; + } + + ea_t ea = start < seg->end_ea ? start : seg->end_ea - 1; + while (true) { + if (byte_below_lower(ea, bounds_)) + return BADADDR; + if (byte_within_bounds(ea, bounds_) && is_mapped_byte_address(ea)) + return ea; + if (ea == seg->start_ea || ea == 0) + break; + --ea; + } + + if (seg->start_ea == 0) + return BADADDR; + seg = get_prev_seg(seg->start_ea); + if (seg && seg->end_ea > seg->start_ea) + start = seg->end_ea - 1; + } + return BADADDR; + } + +public: + BytesGenerator(ByteOrder order, ByteBounds bounds) + : order_(order), bounds_(bounds) {} + + bool next() override { + ea_t next_ea = BADADDR; + if (!started_) { + started_ = true; + next_ea = + order_ == ByteOrder::Asc ? first_ascending() : first_descending(); + } else if (order_ == ByteOrder::Asc) { + if (current_ea_ == BADADDR - 1) + return false; + next_ea = next_mapped_at_or_after(current_ea_ + 1); + } else { + if (current_ea_ == 0) + return false; + next_ea = prev_mapped_at_or_before(current_ea_ - 1); + } + + if (!byte_within_bounds(next_ea, bounds_)) { + current_ea_ = BADADDR; + return false; + } + + current_ea_ = next_ea; + row_.ea = current_ea_; + return true; + } + + const ByteRow ¤t() const override { return row_; } + + int64_t rowid() const override { return static_cast(current_ea_); } +}; + +void apply_byte_constraint(ByteBounds &bounds, + const xsql::GeneratorConstraintArg &arg) { + const ea_t ea = normalize_sql_ea(arg.value.as_int64()); + switch (arg.op) { + case xsql::ConstraintOp::Eq: + tighten_byte_lower_bound(bounds, ea, true); + tighten_byte_upper_bound(bounds, ea, true); break; - case 3: // size - ctx.result_int(static_cast(get_item_size(ea_))); + case xsql::ConstraintOp::Gt: + tighten_byte_lower_bound(bounds, ea, false); break; - case 4: // type - ctx.result_text(get_item_type_str(ea_)); + case xsql::ConstraintOp::Ge: + tighten_byte_lower_bound(bounds, ea, true); break; - case 5: { // is_patched - int patched = - (get_byte(ea_) != static_cast(get_original_byte(ea_))) ? 1 : 0; - ctx.result_int(patched); + case xsql::ConstraintOp::Lt: + tighten_byte_upper_bound(bounds, ea, false); break; - } - case 6: { // fpos - const qoff64_t fpos = get_fileregion_offset(ea_); - if (fpos < 0) - ctx.result_null(); - else - ctx.result_int64(static_cast(fpos)); + case xsql::ConstraintOp::Le: + tighten_byte_upper_bound(bounds, ea, true); break; } +} + +std::unique_ptr> +make_bytes_generator(ByteOrder order, + const std::vector &args) { + ByteBounds bounds; + for (const auto &arg : args) { + apply_byte_constraint(bounds, arg); } + return std::make_unique(order, bounds); } -int64_t BytesAtIterator::rowid() const { return static_cast(ea_); } +} // namespace -CachedTableDef define_bytes() { - return cached_table("bytes") - .no_shared_cache() - .estimate_rows( - []() -> size_t { return static_cast(get_nlist_size()); }) - .cache_builder( - [](std::vector &rows) { collect_head_rows(rows); }) - .row_populator([](HeadRow &row, int argc, xsql::FunctionArg *argv) { - // argv[2] = ea, argv[3] = value, ... - if (argc > 2) - row.ea = static_cast(argv[2].as_int64()); +GeneratorTableDef define_bytes() { + return generator_table("bytes") + .estimate_rows([]() -> size_t { return estimate_mapped_byte_rows(); }) + .generator([]() -> std::unique_ptr> { + return std::make_unique(ByteOrder::Asc, ByteBounds{}); }) .column_int64("ea", - [](const HeadRow &row) -> int64_t { + [](const ByteRow &row) -> int64_t { return static_cast(row.ea); }) .column_int_rw( - "value", [](const HeadRow &row) -> int { return get_byte(row.ea); }, - [](HeadRow &row, int val) -> bool { + "value", [](const ByteRow &row) -> int { return get_byte(row.ea); }, + [](ByteRow &row, int val) -> bool { + if (!is_mapped_byte_address(row.ea)) { + xsql::set_vtab_error("bytes: address is not mapped: " + + idasql::format_ea_hex(row.ea)); + return false; + } bool ok = patch_byte(row.ea, static_cast(val)); if (!ok) xsql::set_vtab_error("bytes: failed to patch byte at " + @@ -1666,38 +2024,56 @@ CachedTableDef define_bytes() { return ok; }) .column_int("original_value", - [](const HeadRow &row) -> int { + [](const ByteRow &row) -> int { return static_cast(get_original_byte(row.ea)); }) - .column_int("size", - [](const HeadRow &row) -> int { - return static_cast(get_item_size(row.ea)); - }) - .column_text("type", - [](const HeadRow &row) -> std::string { - return get_item_type_str(row.ea); - }) .column_int("is_patched", - [](const HeadRow &row) -> int { + [](const ByteRow &row) -> int { return (get_byte(row.ea) != static_cast(get_original_byte(row.ea))) ? 1 : 0; }) .column("fpos", xsql::ColumnType::Integer, - [](xsql::FunctionContext &ctx, const HeadRow &row) { + [](xsql::FunctionContext &ctx, const ByteRow &row) { const qoff64_t fpos = get_fileregion_offset(row.ea); if (fpos < 0) ctx.result_null(); else ctx.result_int64(static_cast(fpos)); - }) - .filter_eq( - "ea", - [](int64_t ea_val) -> std::unique_ptr { - return std::make_unique(static_cast(ea_val)); + }) + .row_lookup([](ByteRow &row, int64_t ea_val) -> bool { + const ea_t ea = normalize_sql_ea(ea_val); + if (!is_mapped_byte_address(ea)) + return false; + row.ea = ea; + return true; + }) + .constraint_filter( + {xsql::required_eq("ea", "")}, + [](const std::vector &args) + -> std::unique_ptr> { + return make_bytes_generator(ByteOrder::Asc, args); + }, + 1.0, 1.0) + .constraint_filter( + {xsql::optional_ge("ea"), xsql::optional_gt("ea"), + xsql::optional_lt("ea"), xsql::optional_le("ea")}, + [](const std::vector &args) + -> std::unique_ptr> { + return make_bytes_generator(ByteOrder::Asc, args); + }, + 10.0, 100.0) + .order_by_consumed("ea") + .constraint_filter( + {xsql::optional_ge("ea"), xsql::optional_gt("ea"), + xsql::optional_lt("ea"), xsql::optional_le("ea")}, + [](const std::vector &args) + -> std::unique_ptr> { + return make_bytes_generator(ByteOrder::Desc, args); }, - 1.0) + 10.0, 100.0) + .order_by_consumed("ea", true) .build(); } @@ -2183,10 +2559,10 @@ bool apply_operand_representation(ea_t ea, int opnum, return ok; } -const char *operand_class_name(optype_t type) { +const char *operand_type_name(optype_t type) { switch (type) { case o_void: - return ""; + return "void"; case o_reg: return "reg"; case o_mem: @@ -2208,6 +2584,29 @@ const char *operand_class_name(optype_t type) { case o_idpspec4: case o_idpspec5: return "idpspec"; + default: + return "idpspec"; + } +} + +const char *operand_class_name(optype_t type) { + switch (type) { + case o_void: + return ""; + case o_reg: + case o_mem: + case o_phrase: + case o_displ: + case o_imm: + case o_far: + case o_near: + case o_idpspec0: + case o_idpspec1: + case o_idpspec2: + case o_idpspec3: + case o_idpspec4: + case o_idpspec5: + return operand_type_name(type); default: return "unknown"; } @@ -2753,6 +3152,302 @@ CachedTableDef define_instructions() { return builder.build(); } +// ============================================================================ +// INSTRUCTION_OPERANDS Table - One decoded operand per row +// ============================================================================ + +static int64_t instruction_operand_rowid(ea_t ea, int opnum) { + return static_cast(ea) * kInstructionOperandCount + opnum; +} + +static int64_t operand_value_for_row(const op_t &op) { + switch (op.type) { + case o_imm: + return static_cast(op.value); + case o_mem: + case o_near: + case o_far: + case o_displ: + return static_cast(op.addr); + case o_reg: + return static_cast(op.reg); + default: + return static_cast(op.value); + } +} + +void instruction_operand_column_common(xsql::FunctionContext &ctx, ea_t ea, + int opnum, int col) { + insn_t insn; + op_t op; + if (!decode_operand(ea, opnum, insn, op, nullptr)) { + ctx.result_null(); + return; + } + + switch (col) { + case 0: + ctx.result_int64(ea); + break; + case 1: { + func_t *f = get_func(ea); + ctx.result_int64(f ? f->start_ea : 0); + break; + } + case 2: + ctx.result_int(opnum); + break; + case 3: { + qstring text; + print_operand(&text, ea, opnum); + tag_remove(&text); + ctx.result_text(text.c_str()); + break; + } + case 4: + ctx.result_int(static_cast(op.type)); + break; + case 5: + ctx.result_text_static(operand_type_name(op.type)); + break; + case 6: + ctx.result_int(static_cast(op.dtype)); + break; + case 7: + ctx.result_int(op.reg); + break; + case 8: + ctx.result_int64(static_cast(op.addr)); + break; + case 9: + ctx.result_int64(static_cast(op.value)); + break; + case 10: + ctx.result_int64(operand_value_for_row(op)); + break; + default: + ctx.result_null(); + break; + } +} + +void collect_instruction_operand_rows(std::vector &rows) { + rows.clear(); + + ea_t ea = inf_get_min_ea(); + ea_t max_ea = inf_get_max_ea(); + while (ea < max_ea && ea != BADADDR) { + if (is_code(get_flags(ea))) { + insn_t insn; + if (decode_insn(&insn, ea) > 0) { + for (int opnum = 0; opnum < UA_MAXOP; ++opnum) { + if (insn.ops[opnum].type == o_void) + break; + rows.push_back({ea, opnum}); + } + } + } + ea = next_head(ea, max_ea); + } +} + +InstructionOperandsAtAddressIterator::InstructionOperandsAtAddressIterator( + ea_t ea) + : ea_(ea) {} + +bool InstructionOperandsAtAddressIterator::advance_to_next_operand() { + while (++opnum_ < UA_MAXOP) { + if (insn_.ops[opnum_].type != o_void) + return true; + } + return false; +} + +bool InstructionOperandsAtAddressIterator::next() { + if (!started_) { + started_ = true; + decoded_ = (ea_ != BADADDR) && is_code(get_flags(ea_)) && + decode_insn(&insn_, ea_) > 0; + valid_ = decoded_ && advance_to_next_operand(); + return valid_; + } + + valid_ = decoded_ && advance_to_next_operand(); + return valid_; +} + +bool InstructionOperandsAtAddressIterator::eof() const { + return started_ && !valid_; +} + +void InstructionOperandsAtAddressIterator::column(xsql::FunctionContext &ctx, + int col) { + instruction_operand_column_common(ctx, ea_, opnum_, col); +} + +int64_t InstructionOperandsAtAddressIterator::rowid() const { + return instruction_operand_rowid(ea_, opnum_); +} + +InstructionOperandsInFuncIterator::InstructionOperandsInFuncIterator( + ea_t func_addr) + : func_addr_(func_addr) { + pfn_ = get_func(func_addr_); +} + +bool InstructionOperandsInFuncIterator::load_current_instruction() { + if (!fii_valid_) + return false; + current_ea_ = fii_.current(); + decoded_ = decode_insn(&insn_, current_ea_) > 0; + opnum_ = -1; + return decoded_; +} + +bool InstructionOperandsInFuncIterator::advance_to_next_operand() { + while (++opnum_ < UA_MAXOP) { + if (insn_.ops[opnum_].type != o_void) + return true; + } + return false; +} + +bool InstructionOperandsInFuncIterator::advance_to_next_instruction_with_operand() { + while (fii_valid_) { + if (load_current_instruction() && advance_to_next_operand()) + return true; + fii_valid_ = fii_.next_code(); + } + return false; +} + +bool InstructionOperandsInFuncIterator::next() { + if (!pfn_) + return false; + + if (!started_) { + started_ = true; + fii_valid_ = fii_.set(pfn_); + valid_ = advance_to_next_instruction_with_operand(); + return valid_; + } + + if (valid_ && advance_to_next_operand()) + return true; + + if (fii_valid_) + fii_valid_ = fii_.next_code(); + valid_ = advance_to_next_instruction_with_operand(); + return valid_; +} + +bool InstructionOperandsInFuncIterator::eof() const { + return started_ && !valid_; +} + +void InstructionOperandsInFuncIterator::column(xsql::FunctionContext &ctx, + int col) { + instruction_operand_column_common(ctx, current_ea_, opnum_, col); +} + +int64_t InstructionOperandsInFuncIterator::rowid() const { + return instruction_operand_rowid(current_ea_, opnum_); +} + +CachedTableDef define_instruction_operands() { + return cached_table("instruction_operands") + .no_shared_cache() + .estimate_rows([]() -> size_t { + return static_cast(get_nlist_size()) * 2; + }) + .cache_builder([](std::vector &rows) { + collect_instruction_operand_rows(rows); + }) + .column_int64("address", + [](const InstructionOperandRow &row) -> int64_t { + return static_cast(row.ea); + }) + .column_int64("func_addr", [](const InstructionOperandRow &row) -> int64_t { + func_t *f = get_func(row.ea); + return f ? f->start_ea : 0; + }) + .column_int("opnum", [](const InstructionOperandRow &row) -> int { + return row.opnum; + }) + .column_text("text", [](const InstructionOperandRow &row) -> std::string { + qstring text; + print_operand(&text, row.ea, row.opnum); + tag_remove(&text); + return text.c_str(); + }) + .column_int("type_code", [](const InstructionOperandRow &row) -> int { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return 0; + return static_cast(op.type); + }) + .column_text("type_name", + [](const InstructionOperandRow &row) -> std::string { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return ""; + return operand_type_name(op.type); + }) + .column_int("dtype", [](const InstructionOperandRow &row) -> int { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return 0; + return static_cast(op.dtype); + }) + .column_int("reg", [](const InstructionOperandRow &row) -> int { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return 0; + return op.reg; + }) + .column_int64("addr", [](const InstructionOperandRow &row) -> int64_t { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return 0; + return static_cast(op.addr); + }) + .column_int64("raw_value", + [](const InstructionOperandRow &row) -> int64_t { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return 0; + return static_cast(op.value); + }) + .column_int64("value", [](const InstructionOperandRow &row) -> int64_t { + insn_t insn; + op_t op; + if (!decode_operand(row.ea, row.opnum, insn, op, nullptr)) + return 0; + return operand_value_for_row(op); + }) + .filter_eq( + "address", + [](int64_t address) -> std::unique_ptr { + return std::make_unique( + static_cast(address)); + }, + 1.0, 4.0) + .filter_eq( + "func_addr", + [](int64_t func_addr) -> std::unique_ptr { + return std::make_unique( + static_cast(func_addr)); + }, + 200.0) + .build(); +} + // ============================================================================ // USERDATA Table - netnode-backed key-value store // ============================================================================ @@ -2989,7 +3684,8 @@ TableRegistry::TableRegistry() entries(define_entries()), comments(define_comments()), bookmarks(define_bookmarks()), heads(define_heads()), bytes(define_bytes()), patched_bytes(define_patched_bytes()), - instructions(define_instructions()), xrefs(define_xrefs()), + instructions(define_instructions()), + instruction_operands(define_instruction_operands()), xrefs(define_xrefs()), data_refs(define_data_refs()), blocks(define_blocks()), function_chunks(define_function_chunks()), imports(define_imports()), strings(define_strings()), netnode_kv(define_netnode_kv()) { @@ -3020,10 +3716,11 @@ void TableRegistry::register_all(xsql::Database &db) { // Cached tables (query-scoped cache) register_cached_table(db, "comments", &comments); register_cached_table(db, "bookmarks", &bookmarks); - register_cached_table(db, "heads", &heads); - register_cached_table(db, "bytes", &bytes); + register_generator_table(db, "heads", &heads); + register_generator_table(db, "bytes", &bytes); register_cached_table(db, "patched_bytes", &patched_bytes); register_cached_table(db, "instructions", &instructions); + register_cached_table(db, "instruction_operands", &instruction_operands); register_cached_table(db, "xrefs", &xrefs); register_cached_table(db, "data_refs", &data_refs); register_cached_table(db, "blocks", &blocks); @@ -3041,29 +3738,34 @@ void TableRegistry::register_all(xsql::Database &db) { void TableRegistry::create_helper_views(xsql::Database &db) { // callers view - who calls a function - // Uses pre-computed from_func and name_at() scalar to avoid expensive range - // JOINs + // Uses pre-computed from_func to avoid expensive range joins. db.exec(R"( CREATE VIEW IF NOT EXISTS callers AS SELECT x.to_ea as func_addr, x.from_ea as caller_addr, - COALESCE(name_at(x.from_func), printf('sub_%X', x.from_func)) as caller_name, + COALESCE(f.name, n.name, printf('sub_%X', x.from_func)) as caller_name, x.from_func as caller_func_addr FROM xrefs x + LEFT JOIN funcs f ON f.address = x.from_func + LEFT JOIN names n ON n.address = x.from_func WHERE x.is_code = 1 AND x.from_func != 0 )"); // callees view - what does a function call - // Uses from_func for grouping and name_at() for name resolution + // Uses from_func for grouping and table joins for name resolution. db.exec(R"( CREATE VIEW IF NOT EXISTS callees AS SELECT x.from_func as func_addr, - COALESCE(name_at(x.from_func), printf('sub_%X', x.from_func)) as func_name, + COALESCE(f.name, fn.name, printf('sub_%X', x.from_func)) as func_name, x.to_ea as callee_addr, - COALESCE(name_at(x.to_ea), printf('sub_%X', x.to_ea)) as callee_name + COALESCE(cn.name, cf.name, printf('sub_%X', x.to_ea)) as callee_name FROM xrefs x + LEFT JOIN funcs f ON f.address = x.from_func + LEFT JOIN names fn ON fn.address = x.from_func + LEFT JOIN names cn ON cn.address = x.to_ea + LEFT JOIN funcs cf ON cf.address = x.to_ea WHERE x.is_code = 1 AND x.from_func != 0 )"); @@ -3076,9 +3778,11 @@ void TableRegistry::create_helper_views(xsql::Database &db) { s.length as string_length, x.from_ea as ref_addr, x.from_func as func_addr, - COALESCE(name_at(x.from_func), printf('sub_%X', x.from_func)) as func_name + COALESCE(f.name, n.name, printf('sub_%X', x.from_func)) as func_name FROM strings s JOIN xrefs x ON x.to_ea = s.address + LEFT JOIN funcs f ON f.address = x.from_func + LEFT JOIN names n ON n.address = x.from_func WHERE x.from_func != 0 )"); } diff --git a/src/lib/src/entities.hpp b/src/lib/src/entities.hpp index 4bae0d9..81e4ba2 100644 --- a/src/lib/src/entities.hpp +++ b/src/lib/src/entities.hpp @@ -108,6 +108,10 @@ struct HeadRow { ea_t ea = BADADDR; }; +struct ByteRow { + ea_t ea = BADADDR; +}; + struct PatchedByteInfo { ea_t ea; qoff64_t fpos; @@ -119,6 +123,11 @@ struct InstructionRow { ea_t ea = BADADDR; }; +struct InstructionOperandRow { + ea_t ea = BADADDR; + int opnum = 0; +}; + struct ImportEnumContext { std::vector *cache; int module_idx; @@ -181,8 +190,10 @@ void collect_head_rows(std::vector &rows); const char *get_item_type_str(ea_t ea); void collect_instruction_rows(std::vector &rows); +void collect_instruction_operand_rows(std::vector &rows); // Operand helpers +const char *operand_type_name(optype_t type); std::string operand_kind_text(ea_t ea, int opnum); std::string operand_type_text(ea_t ea, int opnum); int operand_enum_serial(ea_t ea, int opnum); @@ -197,6 +208,8 @@ std::string operand_format_spec_text(ea_t ea, int opnum); void instruction_column_common(xsql::FunctionContext &ctx, ea_t ea, ea_t func_addr, int col); +void instruction_operand_column_common(xsql::FunctionContext &ctx, ea_t ea, + int opnum, int col); // Constants inline constexpr int kInstructionOperandCount = 8; @@ -259,10 +272,11 @@ VTableDef define_names(); VTableDef define_entries(); CachedTableDef define_comments(); CachedTableDef define_bookmarks(); -CachedTableDef define_heads(); -CachedTableDef define_bytes(); +GeneratorTableDef define_heads(); +GeneratorTableDef define_bytes(); CachedTableDef define_patched_bytes(); CachedTableDef define_instructions(); +CachedTableDef define_instruction_operands(); CachedTableDef define_xrefs(); CachedTableDef define_data_refs(); CachedTableDef define_blocks(); @@ -358,20 +372,6 @@ class BlocksInFuncIterator : public xsql::RowIterator { int64_t rowid() const override; }; -// Iterator for single-address point query (constraint pushdown on ea) -class BytesAtIterator : public xsql::RowIterator { - ea_t ea_; - bool yielded_ = false; - bool exhausted_ = false; - -public: - explicit BytesAtIterator(ea_t ea); - bool next() override; - bool eof() const override; - void column(xsql::FunctionContext &ctx, int col) override; - int64_t rowid() const override; -}; - // Iterator for instructions within a single function (constraint pushdown) class InstructionsInFuncIterator : public xsql::RowIterator { ea_t func_addr_; @@ -403,6 +403,50 @@ class InstructionAtAddressIterator : public xsql::RowIterator { int64_t rowid() const override; }; +// Iterator for operands at a single instruction address. +class InstructionOperandsAtAddressIterator : public xsql::RowIterator { + ea_t ea_; + insn_t insn_; + int opnum_ = -1; + bool started_ = false; + bool decoded_ = false; + bool valid_ = false; + + bool advance_to_next_operand(); + +public: + explicit InstructionOperandsAtAddressIterator(ea_t ea); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext &ctx, int col) override; + int64_t rowid() const override; +}; + +// Iterator for operands in instructions within a single function. +class InstructionOperandsInFuncIterator : public xsql::RowIterator { + ea_t func_addr_; + func_t *pfn_ = nullptr; + func_item_iterator_t fii_; + insn_t insn_; + ea_t current_ea_ = BADADDR; + int opnum_ = -1; + bool started_ = false; + bool fii_valid_ = false; + bool decoded_ = false; + bool valid_ = false; + + bool load_current_instruction(); + bool advance_to_next_operand(); + bool advance_to_next_instruction_with_operand(); + +public: + explicit InstructionOperandsInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext &ctx, int col) override; + int64_t rowid() const override; +}; + // ============================================================================ // TableRegistry // ============================================================================ @@ -415,10 +459,11 @@ struct TableRegistry { VTableDef entries; CachedTableDef comments; CachedTableDef bookmarks; - CachedTableDef heads; - CachedTableDef bytes; + GeneratorTableDef heads; + GeneratorTableDef bytes; CachedTableDef patched_bytes; CachedTableDef instructions; + CachedTableDef instruction_operands; // Cached tables (query-scoped cache - memory freed after query) CachedTableDef xrefs; @@ -456,6 +501,14 @@ struct TableRegistry { db.register_cached_table(module_name.c_str(), def); db.create_table(name, module_name.c_str()); } + + template + void register_generator_table(xsql::Database &db, const char *name, + const GeneratorTableDef *def) { + std::string module_name = std::string("ida_") + name; + db.register_generator_table(module_name.c_str(), def); + db.create_table(name, module_name.c_str()); + } }; } // namespace entities diff --git a/src/lib/src/functions.cpp b/src/lib/src/functions.cpp index f586691..6e0aa23 100644 --- a/src/lib/src/functions.cpp +++ b/src/lib/src/functions.cpp @@ -525,143 +525,6 @@ static void sql_get_original_byte(xsql::FunctionContext& ctx, int argc, xsql::Fu ctx.result_int(static_cast(get_original_byte(ea))); } -// ============================================================================ -// Name Functions -// ============================================================================ - -// name_at(address) - Get name at address -static void sql_name_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("name_at requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - qstring name; - if (get_name(&name, ea) > 0 && !name.empty()) { - ctx.result_text(name.c_str()); - } else { - ctx.result_null(); - } -} - -// func_at(address) - Get function name containing address -static void sql_func_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("func_at requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - func_t* func = get_func(ea); - if (func) { - qstring name; - if (get_func_name(&name, func->start_ea) > 0) { - ctx.result_text(name.c_str()); - return; - } - } - ctx.result_null(); -} - -// func_start(address) - Get start address of function containing address -static void sql_func_start(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("func_start requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - func_t* func = get_func(ea); - if (func) { - ctx.result_int64(func->start_ea); - } else { - ctx.result_null(); - } -} - -// func_end(address) - Get end address of function containing address -static void sql_func_end(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("func_end requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - func_t* func = get_func(ea); - if (func) { - ctx.result_int64(func->end_ea); - } else { - ctx.result_null(); - } -} - -// ============================================================================ -// Function Index Functions (O(1) access) -// ============================================================================ - -// func_qty() - Get total function count -static void sql_func_qty(xsql::FunctionContext& ctx, int, xsql::FunctionArg*) { - ctx.result_int64(get_func_qty()); -} - -// func_at_index(n) - Get function address at index n -static void sql_func_at_index(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("func_at_index requires 1 argument (index)"); - return; - } - - size_t idx = static_cast(argv[0].as_int64()); - size_t qty = get_func_qty(); - - if (idx >= qty) { - ctx.result_null(); - return; - } - - func_t* f = getn_func(idx); - if (f) { - ctx.result_int64(f->start_ea); - } else { - ctx.result_null(); - } -} - -// ============================================================================ -// Name Modification Functions -// ============================================================================ - -// set_name(address, name) - Set name at address -static void sql_set_name(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 2) { - ctx.result_error("set_name requires 2 arguments (address, name)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - const char* name = argv[1].as_c_str(); - - bool success = set_name(ea, name, SN_CHECK) != 0; - if (success) decompiler::invalidate_decompiler_cache(ea); - ctx.result_int(success ? 1 : 0); -} - // type_at(address) - Get type declaration at address static void sql_type_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 1) { @@ -862,125 +725,6 @@ static void sql_call_arg_addrs(xsql::FunctionContext& ctx, int argc, xsql::Funct ctx.result_text(arr.dump()); } -// ============================================================================ -// Segment Functions -// ============================================================================ - -// segment_at(address) - Get segment name containing address -static void sql_segment_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("segment_at requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - segment_t* seg = getseg(ea); - if (seg) { - qstring name; - if (get_segm_name(&name, seg) > 0) { - ctx.result_text(name.c_str()); - return; - } - } - ctx.result_null(); -} - -// ============================================================================ -// Comment Functions -// ============================================================================ - -// comment_at(address) - Get comment at address -static void sql_comment_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("comment_at requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - qstring cmt; - if (get_cmt(&cmt, ea, false) > 0) { - ctx.result_text(cmt.c_str()); - } else if (get_cmt(&cmt, ea, true) > 0) { - // Try repeatable comment - ctx.result_text(cmt.c_str()); - } else { - ctx.result_null(); - } -} - -// set_comment(address, text) - Set comment at address -// set_comment(address, text, repeatable) - Set comment with type -static void sql_set_comment(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 2) { - ctx.result_error("set_comment requires 2-3 arguments (address, text, [repeatable])"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - const char* cmt = argv[1].as_c_str(); - bool repeatable = (argc >= 3) ? argv[2].as_int() != 0 : false; - - bool success = set_cmt(ea, cmt ? cmt : "", repeatable); - ctx.result_int(success ? 1 : 0); -} - -// ============================================================================ -// Cross-Reference Functions -// ============================================================================ - -// xrefs_to(address) - Get xrefs to address as JSON array -static void sql_xrefs_to(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("xrefs_to requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - - xsql::json arr = xsql::json::array(); - xrefblk_t xb; - for (bool ok = xb.first_to(ea, XREF_ALL); ok; ok = xb.next_to()) { - arr.push_back({{"from", xb.from}, {"type", static_cast(xb.type)}}); - } - - std::string str = arr.dump(); - ctx.result_text(str); -} - -// xrefs_from(address) - Get xrefs from address as JSON array -static void sql_xrefs_from(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("xrefs_from requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - - xsql::json arr = xsql::json::array(); - xrefblk_t xb; - for (bool ok = xb.first_from(ea, XREF_ALL); ok; ok = xb.next_from()) { - arr.push_back({{"to", xb.to}, {"type", static_cast(xb.type)}}); - } - - std::string str = arr.dump(); - ctx.result_text(str); -} - // ============================================================================ // Decompiler Functions (Optional - requires Hex-Rays) // ============================================================================ @@ -1078,348 +822,43 @@ static std::string render_pseudocode(cfuncptr_t& cfunc) { lvar_name_to_idx.erase(n); } - for (size_t i = 0; i < sv.size(); i++) { - ea_t line_ea = decompiler::extract_line_ea(&*cfunc, sv[i].line); - qstring line = sv[i].line; - tag_remove(&line); - - std::string rendered_line = line.c_str(); - int decl_idx = find_decl_lvar_index(rendered_line, lvar_name_to_idx); - if (decl_idx >= 0 && rendered_line.find("[lv:") == std::string::npos) { - if (rendered_line.find("//") != std::string::npos) { - rendered_line += " [lv:" + std::to_string(decl_idx) + "]"; - } else { - rendered_line += " // [lv:" + std::to_string(decl_idx) + "]"; - } - } - - if (i > 0) result << "\n"; - char prefix[48]; - if (line_ea != 0 && line_ea != BADADDR) - qsnprintf(prefix, sizeof(prefix), "/* %a */ ", line_ea); - else - qsnprintf(prefix, sizeof(prefix), "/* */ "); - result << prefix << rendered_line; - } - return result.str(); -} - -// decompile(address) - Get decompiled pseudocode (runtime Hex-Rays detection) -// Uses decompiler::hexrays_available() set during DecompilerRegistry::register_all() -static void sql_decompile(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("decompile requires 1 argument (address)"); - return; - } - - // Check cached Hex-Rays availability (set during DecompilerRegistry::register_all) - if (!decompiler::hexrays_available()) { - ctx.result_error("Decompiler not available (requires Hex-Rays license)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - - func_t* func = get_func(ea); - if (!func) { - ctx.result_error("No function at address"); - return; - } - - hexrays_failure_t hf; - cfuncptr_t cfunc = decompile(func, &hf); - if (!cfunc) { - std::string err = "Decompilation failed: " + std::string(hf.desc().c_str()); - ctx.result_error(err); - return; - } - - std::string str = render_pseudocode(cfunc); - ctx.result_text(str); -} - -// decompile(address, refresh) - Get decompiled pseudocode with optional cache invalidation -// When refresh=1, invalidates the cached decompilation before decompiling. -// Use after renaming functions or local variables to get fresh pseudocode. -static void sql_decompile_2(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 2) { - ctx.result_error("decompile requires 2 arguments (address, refresh)"); - return; - } - - if (!decompiler::hexrays_available()) { - ctx.result_error("Decompiler not available (requires Hex-Rays license)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - int refresh = argv[1].as_int(); - - func_t* func = get_func(ea); - if (!func) { - ctx.result_error("No function at address"); - return; - } - - if (refresh) { - mark_cfunc_dirty(func->start_ea, false); - } - - hexrays_failure_t hf; - cfuncptr_t cfunc = decompile(func, &hf); - if (!cfunc) { - std::string err = "Decompilation failed: " + std::string(hf.desc().c_str()); - ctx.result_error(err); - return; - } - - std::string str = render_pseudocode(cfunc); - ctx.result_text(str); -} - -// ============================================================================ -// Address Utility Functions -// ============================================================================ - -// next_head(address) - Get next defined head -static void sql_next_head(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("next_head requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - ea_t next = next_head(ea, BADADDR); - if (next != BADADDR) { - ctx.result_int64(next); - } else { - ctx.result_null(); - } -} - -// prev_head(address) - Get previous defined head -static void sql_prev_head(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("prev_head requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - ea_t prev = prev_head(ea, 0); - if (prev != BADADDR) { - ctx.result_int64(prev); - } else { - ctx.result_null(); - } -} - -// hex(value) - Format integer as hex string -static void sql_hex(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("hex requires 1 argument (value)"); - return; - } - - int64_t val = argv[0].as_int64(); - std::ostringstream result; - result << "0x" << std::hex << val; - std::string str = result.str(); - ctx.result_text(str); -} - -// ============================================================================ -// Item Query Functions -// ============================================================================ - -// item_type(address) - Get type of item at address -static void sql_item_type(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("item_type requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - flags64_t f = get_flags(ea); - - const char* type = "unknown"; - if (is_code(f)) type = "code"; - else if (is_strlit(f)) type = "string"; - else if (is_struct(f)) type = "struct"; - else if (is_align(f)) type = "align"; - else if (is_data(f)) type = "data"; - - ctx.result_text_static(type); -} - -// item_size(address) - Get size of item at address -static void sql_item_size(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("item_size requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - asize_t size = get_item_size(ea); - ctx.result_int64(size); -} - -// is_code(address) - Check if address is code -static void sql_is_code(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("is_code requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - ctx.result_int(is_code(get_flags(ea)) ? 1 : 0); -} - -// is_data(address) - Check if address is data -static void sql_is_data(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("is_data requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - ctx.result_int(is_data(get_flags(ea)) ? 1 : 0); -} - -// mnemonic(address) - Get instruction mnemonic -static void sql_mnemonic(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("mnemonic requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - if (!is_code(get_flags(ea))) { - ctx.result_null(); - return; - } - - qstring mnem; - print_insn_mnem(&mnem, ea); - ctx.result_text(mnem.c_str()); -} - -// operand(address, n) - Get operand text -static void sql_operand(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 2) { - ctx.result_error("operand requires 2 arguments (address, operand_num)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - int n = argv[1].as_int(); - - if (!is_code(get_flags(ea)) || n < 0 || n > 5) { - ctx.result_null(); - return; - } - - qstring op; - print_operand(&op, ea, n); - tag_remove(&op); - if (op.empty()) { - ctx.result_null(); - } else { - ctx.result_text(op.c_str()); - } -} - -// flags_at(address) - Get raw flags at address -static void sql_flags_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("flags_at requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - ctx.result_int64(get_flags(ea)); -} + for (size_t i = 0; i < sv.size(); i++) { + ea_t line_ea = decompiler::extract_line_ea(&*cfunc, sv[i].line); + qstring line = sv[i].line; + tag_remove(&line); -// ============================================================================ -// Instruction Decoding Functions -// ============================================================================ + std::string rendered_line = line.c_str(); + int decl_idx = find_decl_lvar_index(rendered_line, lvar_name_to_idx); + if (decl_idx >= 0 && rendered_line.find("[lv:") == std::string::npos) { + if (rendered_line.find("//") != std::string::npos) { + rendered_line += " [lv:" + std::to_string(decl_idx) + "]"; + } else { + rendered_line += " // [lv:" + std::to_string(decl_idx) + "]"; + } + } -// Operand type names -static const char* get_optype_name(optype_t type) { - switch (type) { - case o_void: return "void"; - case o_reg: return "reg"; - case o_mem: return "mem"; - case o_phrase: return "phrase"; - case o_displ: return "displ"; - case o_imm: return "imm"; - case o_far: return "far"; - case o_near: return "near"; - default: return "idpspec"; + if (i > 0) result << "\n"; + char prefix[48]; + if (line_ea != 0 && line_ea != BADADDR) + qsnprintf(prefix, sizeof(prefix), "/* %a */ ", line_ea); + else + qsnprintf(prefix, sizeof(prefix), "/* */ "); + result << prefix << rendered_line; } + return result.str(); } -// itype(address) - Get instruction type code -static void sql_itype(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { +// decompile(address) - Get decompiled pseudocode (runtime Hex-Rays detection) +// Uses decompiler::hexrays_available() set during DecompilerRegistry::register_all() +static void sql_decompile(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 1) { - ctx.result_error("itype requires 1 argument (address)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - - if (!is_code(get_flags(ea))) { - ctx.result_null(); + ctx.result_error("decompile requires 1 argument (address)"); return; } - insn_t insn; - if (decode_insn(&insn, ea) > 0) { - ctx.result_int(insn.itype); - } else { - ctx.result_null(); - } -} - -// decode_insn(address) - Get full instruction info as JSON -static void sql_decode_insn(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("decode_insn requires 1 argument (address)"); + // Check cached Hex-Rays availability (set during DecompilerRegistry::register_all) + if (!decompiler::hexrays_available()) { + ctx.result_error("Decompiler not available (requires Hex-Rays license)"); return; } @@ -1428,94 +867,35 @@ static void sql_decode_insn(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - if (!is_code(get_flags(ea))) { - ctx.result_null(); + func_t* func = get_func(ea); + if (!func) { + ctx.result_error("No function at address"); return; } - insn_t insn; - int len = decode_insn(&insn, ea); - if (len <= 0) { - ctx.result_null(); + hexrays_failure_t hf; + cfuncptr_t cfunc = decompile(func, &hf); + if (!cfunc) { + std::string err = "Decompilation failed: " + std::string(hf.desc().c_str()); + ctx.result_error(err); return; } - // Get mnemonic - qstring mnem; - print_insn_mnem(&mnem, ea); - - // Build JSON using xsql::json - xsql::json result = { - {"ea", insn.ea}, - {"itype", insn.itype}, - {"size", insn.size}, - {"mnemonic", mnem.c_str()} - }; - - // Operands array - xsql::json ops = xsql::json::array(); - for (int i = 0; i < UA_MAXOP; i++) { - const op_t& op = insn.ops[i]; - if (op.type == o_void) break; - - // Get operand text - qstring op_text; - print_operand(&op_text, ea, i); - tag_remove(&op_text); - - ops.push_back({ - {"n", i}, - {"type", static_cast(op.type)}, - {"type_name", get_optype_name(op.type)}, - {"dtype", static_cast(op.dtype)}, - {"reg", op.reg}, - {"addr", op.addr}, - {"value", op.value}, - {"text", op_text.c_str()} // nlohmann auto-escapes - }); - } - result["operands"] = ops; - - std::string str = result.dump(); + std::string str = render_pseudocode(cfunc); ctx.result_text(str); } -// operand_type(address, n) - Get operand type -static void sql_operand_type(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { +// decompile(address, refresh) - Get decompiled pseudocode with optional cache invalidation +// When refresh=1, invalidates the cached decompilation before decompiling. +// Use after renaming functions or local variables to get fresh pseudocode. +static void sql_decompile_2(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 2) { - ctx.result_error("operand_type requires 2 arguments (address, operand_num)"); - return; - } - - ea_t ea = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { - return; - } - int n = argv[1].as_int(); - - if (!is_code(get_flags(ea)) || n < 0 || n >= UA_MAXOP) { - ctx.result_null(); - return; - } - - insn_t insn; - if (decode_insn(&insn, ea) <= 0) { - ctx.result_null(); + ctx.result_error("decompile requires 2 arguments (address, refresh)"); return; } - const op_t& op = insn.ops[n]; - if (op.type == o_void) { - ctx.result_null(); - } else { - ctx.result_text_static(get_optype_name(op.type)); - } -} - -// operand_value(address, n) - Get operand value (immediate or address) -static void sql_operand_value(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 2) { - ctx.result_error("operand_value requires 2 arguments (address, operand_num)"); + if (!decompiler::hexrays_available()) { + ctx.result_error("Decompiler not available (requires Hex-Rays license)"); return; } @@ -1523,40 +903,28 @@ static void sql_operand_value(xsql::FunctionContext& ctx, int argc, xsql::Functi if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { return; } - int n = argv[1].as_int(); + int refresh = argv[1].as_int(); - if (!is_code(get_flags(ea)) || n < 0 || n >= UA_MAXOP) { - ctx.result_null(); + func_t* func = get_func(ea); + if (!func) { + ctx.result_error("No function at address"); return; } - insn_t insn; - if (decode_insn(&insn, ea) <= 0) { - ctx.result_null(); - return; + if (refresh) { + mark_cfunc_dirty(func->start_ea, false); } - const op_t& op = insn.ops[n]; - switch (op.type) { - case o_void: - ctx.result_null(); - break; - case o_imm: - ctx.result_int64(op.value); - break; - case o_mem: - case o_near: - case o_far: - case o_displ: - ctx.result_int64(op.addr); - break; - case o_reg: - ctx.result_int(op.reg); - break; - default: - ctx.result_int64(op.value); - break; + hexrays_failure_t hf; + cfuncptr_t cfunc = decompile(func, &hf); + if (!cfunc) { + std::string err = "Decompilation failed: " + std::string(hf.desc().c_str()); + ctx.result_error(err); + return; } + + std::string str = render_pseudocode(cfunc); + ctx.result_text(str); } // ============================================================================ @@ -1715,7 +1083,15 @@ static void sql_gen_cfg_dot_file(xsql::FunctionContext& ctx, int argc, xsql::Fun ctx.result_int(1); // Success } -inline std::string escape_sql_text(const char* in); +static std::string escape_sql_text(const char* in) { + std::string escaped; + if (!in) return escaped; + for (const char* p = in; *p; ++p) { + if (*p == '\'') escaped += "''"; + else escaped.push_back(*p); + } + return escaped; +} // gen_schema_dot(db) - Generate DOT diagram of all tables // This uses SQLite introspection to build the schema @@ -1801,135 +1177,6 @@ static void sql_gen_schema_dot(xsql::FunctionContext& ctx, int argc, xsql::Funct ctx.result_text(str); } -// ============================================================================ -// Decompiler Lvar Functions (requires Hex-Rays) -// ============================================================================ - -inline xsql::json lvar_rename_result_json(const decompiler::LvarRenameResult& r) { - xsql::json j = { - {"success", r.success}, - {"applied", r.applied}, - {"func_addr", r.func_addr}, - {"lvar_idx", r.lvar_idx}, - {"target_name", r.target_name}, - {"requested_name", r.requested_name}, - {"before_name", r.before_name}, - {"after_name", r.after_name}, - {"reason", r.reason.empty() ? xsql::json(nullptr) : xsql::json(r.reason)} - }; - j["warnings"] = r.warnings; - return j; -} - -inline xsql::json label_rename_result_json(const decompiler::LabelRenameResult& r) { - xsql::json j = { - {"success", r.success}, - {"applied", r.applied}, - {"func_addr", r.func_addr}, - {"label_num", r.label_num}, - {"requested_name", r.requested_name}, - {"before_name", r.before_name}, - {"after_name", r.after_name}, - {"reason", r.reason.empty() ? xsql::json(nullptr) : xsql::json(r.reason)} - }; - j["warnings"] = r.warnings; - return j; -} - -// rename_lvar(func_addr, lvar_idx, new_name) - Rename a local variable -// Uses locator-based rename_lvar_at() for precise identification by index. -// Returns JSON with result details. -static void sql_rename_lvar(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("rename_lvar requires 3 arguments (func_addr, lvar_idx, new_name)"); - return; - } - - ea_t func_addr = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { - return; - } - int lvar_idx = argv[1].as_int(); - const char* new_name = argv[2].as_c_str(); - - if (!new_name) { - ctx.result_error("Invalid name"); - return; - } - - decompiler::LvarRenameResult result = decompiler::rename_lvar_at_ex(func_addr, lvar_idx, new_name); - std::string out = lvar_rename_result_json(result).dump(); - ctx.result_text(out); -} - -// rename_lvar_by_name(func_addr, old_name, new_name) - Rename local variable by current name. -// Returns JSON with success/applied details, explicit reasons, and warnings. -static void sql_rename_lvar_by_name(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("rename_lvar_by_name requires 3 arguments (func_addr, old_name, new_name)"); - return; - } - - ea_t func_addr = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { - return; - } - const char* old_name = argv[1].as_c_str(); - const char* new_name = argv[2].as_c_str(); - - if (!old_name || !new_name) { - ctx.result_error("Invalid name"); - return; - } - - decompiler::LvarRenameResult result = decompiler::rename_lvar_by_name_ex(func_addr, old_name, new_name); - std::string out = lvar_rename_result_json(result).dump(); - ctx.result_text(out); -} - -// rename_label(func_addr, label_num, new_name) - Rename a decompiler label. -// Returns JSON with success/applied details, explicit reasons, and warnings. -static void sql_rename_label(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("rename_label requires 3 arguments (func_addr, label_num, new_name)"); - return; - } - - ea_t func_addr = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { - return; - } - int label_num = argv[1].as_int(); - const char* new_name = argv[2].as_c_str(); - if (!new_name) { - ctx.result_error("Invalid label name"); - return; - } - - decompiler::LabelRenameResult result = decompiler::rename_label_ex(func_addr, label_num, new_name); - std::string out = label_rename_result_json(result).dump(); - ctx.result_text(out); -} - -// set_lvar_comment(func_addr, lvar_idx, comment) - Set local variable comment by index. -// Returns 1 on success, 0 on failure. -static void sql_set_lvar_comment(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("set_lvar_comment requires 3 arguments (func_addr, lvar_idx, comment)"); - return; - } - - ea_t func_addr = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { - return; - } - int lvar_idx = argv[1].as_int(); - const char* comment = argv[2].is_null() ? "" : argv[2].as_c_str(); - - bool ok = decompiler::set_lvar_comment_at(func_addr, lvar_idx, comment ? comment : ""); - ctx.result_int(ok ? 1 : 0); -} - // trim_copy is now in using idasql::trim_copy; @@ -3128,142 +2375,6 @@ static void sql_get_numform_ea_expr(xsql::FunctionContext& ctx, int argc, xsql:: ctx.result_text(numform_to_json(item_ea, opnum, nf)); } -// list_lvars(func_addr) - List local variables for a function as JSON -static void sql_list_lvars(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("list_lvars requires 1 argument (func_addr)"); - return; - } - - ea_t func_addr = BADADDR; - if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { - return; - } - - // Check cached Hex-Rays availability - if (!decompiler::hexrays_available()) { - ctx.result_error("Hex-Rays not available"); - return; - } - - func_t* f = get_func(func_addr); - if (!f) { - ctx.result_error("Function not found"); - return; - } - - hexrays_failure_t hf; - cfuncptr_t cfunc = decompile(f, &hf); - if (!cfunc) { - std::string err = "Decompilation failed: " + std::string(hf.str.c_str()); - ctx.result_error(err); - return; - } - - lvars_t* lvars = cfunc->get_lvars(); - if (!lvars) { - ctx.result_text_static("[]"); - return; - } - - xsql::json arr = xsql::json::array(); - for (size_t i = 0; i < lvars->size(); i++) { - const lvar_t& lv = (*lvars)[i]; - std::string lv_name = lv.name.c_str(); - - qstring type_str; - lv.type().print(&type_str); - - arr.push_back({ - {"idx", i}, - {"name", lv_name}, - {"type", type_str.c_str()}, - {"comment", lv.cmt.c_str()}, - {"size", lv.width}, - {"is_arg", lv.is_arg_var()}, - {"is_result", lv.is_result_var()}, - {"is_user_nameable", !lv_name.empty()}, - {"displayed_in_pseudocode", !lv_name.empty()}, - {"can_rename", !lv_name.empty()} - }); - } - - std::string str = arr.dump(); - ctx.result_text(str); -} - -// ============================================================================ -// Grep Search Function (unified entity search) -// ============================================================================ - -inline std::string escape_sql_text(const char* in) { - std::string escaped; - if (!in) return escaped; - for (const char* p = in; *p; ++p) { - if (*p == '\'') escaped += "''"; - else escaped.push_back(*p); - } - return escaped; -} - -// grep(pattern [, limit [, offset]]) -> JSON array of matching entities. -// Pattern is passed to the grep virtual table's `pattern` column. -static void sql_grep(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1 || argc > 3) { - ctx.result_error("grep requires 1-3 arguments (pattern, [limit], [offset])"); - return; - } - - const char* pattern = argv[0].as_c_str(); - if (!pattern || !pattern[0]) { - ctx.result_text_static("[]"); - return; - } - - int limit = 50; - int offset = 0; - if (argc >= 2 && !argv[1].is_null()) { - limit = argv[1].as_int(); - } - if (argc >= 3 && !argv[2].is_null()) { - offset = argv[2].as_int(); - } - if (limit < 0) limit = 0; - if (limit > 10000) limit = 10000; - if (offset < 0) offset = 0; - - std::ostringstream query; - query << "SELECT name, kind, address, ordinal, parent_name, full_name " - << "FROM grep " - << "WHERE pattern = '" << escape_sql_text(pattern) << "' " - << "ORDER BY kind, name " - << "LIMIT " << limit << " OFFSET " << offset; - - xsql::json arr = xsql::json::array(); - std::string err; - if (!ctx.query_each(query.str(), [&](const xsql::QueryRow& row) { - std::string name = row.is_null(0) ? "" : row.text(0); - std::string kind = row.is_null(1) ? "" : row.text(1); - - xsql::json obj = { - {"name", name}, - {"kind", kind}, - {"full_name", row.is_null(5) ? "" : row.text(5)} - }; - - obj["address"] = row.is_null(2) ? xsql::json(nullptr) : xsql::json(row.int64_value(2)); - obj["ordinal"] = row.is_null(3) ? xsql::json(nullptr) : xsql::json(row.int_value(3)); - obj["parent_name"] = row.is_null(4) ? xsql::json(nullptr) : xsql::json(row.text(4)); - arr.push_back(obj); - }, &err)) { - std::string err_msg = "grep query error: " + err; - ctx.result_error(err_msg); - return; - } - - ctx.result_text(arr.dump()); -} - // ============================================================================ // IDAPython Execution Functions // ============================================================================ @@ -3399,11 +2510,6 @@ static void sql_rebuild_strings(xsql::FunctionContext& ctx, int argc, xsql::Func ctx.result_int64(static_cast(count)); } -// string_count() - Get current count of strings in IDA's cached list (no rebuild) -static void sql_string_count(xsql::FunctionContext& ctx, int /*argc*/, xsql::FunctionArg* /*argv*/) { - ctx.result_int64(static_cast(get_strlist_qty())); -} - // ============================================================================ // Database Persistence // ============================================================================ @@ -3444,32 +2550,11 @@ void register_sql_functions(xsql::Database& db) { db.register_function("revert_byte", 1, xsql::ScalarFn(sql_revert_byte)); db.register_function("get_original_byte", 1, xsql::ScalarFn(sql_get_original_byte)); - // Names - db.register_function("name_at", 1, xsql::ScalarFn(sql_name_at)); - db.register_function("func_at", 1, xsql::ScalarFn(sql_func_at)); - db.register_function("func_start", 1, xsql::ScalarFn(sql_func_start)); - db.register_function("func_end", 1, xsql::ScalarFn(sql_func_end)); - db.register_function("set_name", 2, xsql::ScalarFn(sql_set_name)); + // Names and types db.register_function("type_at", 1, xsql::ScalarFn(sql_type_at)); db.register_function("set_type", 2, xsql::ScalarFn(sql_set_type)); db.register_function("parse_decls", 1, xsql::ScalarFn(sql_parse_decls)); - // Function index (O(1) access) - db.register_function("func_qty", 0, xsql::ScalarFn(sql_func_qty)); - db.register_function("func_at_index", 1, xsql::ScalarFn(sql_func_at_index)); - - // Segments - db.register_function("segment_at", 1, xsql::ScalarFn(sql_segment_at)); - - // Comments - db.register_function("comment_at", 1, xsql::ScalarFn(sql_comment_at)); - db.register_function("set_comment", 2, xsql::ScalarFn(sql_set_comment)); - db.register_function("set_comment", 3, xsql::ScalarFn(sql_set_comment)); - - // Cross-references - db.register_function("xrefs_to", 1, xsql::ScalarFn(sql_xrefs_to)); - db.register_function("xrefs_from", 1, xsql::ScalarFn(sql_xrefs_from)); - // Decompiler (only registered if Hex-Rays is available) if (decompiler::hexrays_available()) { db.register_function("decompile", 1, xsql::ScalarFn(sql_decompile)); @@ -3477,11 +2562,6 @@ void register_sql_functions(xsql::Database& db) { db.register_function("apply_callee_type", 2, xsql::ScalarFn(sql_apply_callee_type)); db.register_function("callee_type_at", 1, xsql::ScalarFn(sql_callee_type_at)); db.register_function("call_arg_addrs", 1, xsql::ScalarFn(sql_call_arg_addrs)); - db.register_function("list_lvars", 1, xsql::ScalarFn(sql_list_lvars)); - db.register_function("rename_lvar", 3, xsql::ScalarFn(sql_rename_lvar)); - db.register_function("rename_lvar_by_name", 3, xsql::ScalarFn(sql_rename_lvar_by_name)); - db.register_function("rename_label", 3, xsql::ScalarFn(sql_rename_label)); - db.register_function("set_lvar_comment", 3, xsql::ScalarFn(sql_set_lvar_comment)); db.register_function("set_union_selection", 3, xsql::ScalarFn(sql_set_union_selection)); db.register_function("set_union_selection_item", 3, xsql::ScalarFn(sql_set_union_selection_item)); db.register_function("set_union_selection_ea_arg", 4, xsql::ScalarFn(sql_set_union_selection_ea_arg)); @@ -3517,26 +2597,6 @@ void register_sql_functions(xsql::Database& db) { db.register_function("get_numform_ea_expr", 5, xsql::ScalarFn(sql_get_numform_ea_expr)); } - // Address utilities - db.register_function("next_head", 1, xsql::ScalarFn(sql_next_head)); - db.register_function("prev_head", 1, xsql::ScalarFn(sql_prev_head)); - db.register_function("hex", 1, xsql::ScalarFn(sql_hex)); - - // Item query functions - db.register_function("item_type", 1, xsql::ScalarFn(sql_item_type)); - db.register_function("item_size", 1, xsql::ScalarFn(sql_item_size)); - db.register_function("is_code", 1, xsql::ScalarFn(sql_is_code)); - db.register_function("is_data", 1, xsql::ScalarFn(sql_is_data)); - db.register_function("mnemonic", 1, xsql::ScalarFn(sql_mnemonic)); - db.register_function("operand", 2, xsql::ScalarFn(sql_operand)); - db.register_function("flags_at", 1, xsql::ScalarFn(sql_flags_at)); - - // Instruction decoding - db.register_function("itype", 1, xsql::ScalarFn(sql_itype)); - db.register_function("decode_insn", 1, xsql::ScalarFn(sql_decode_insn)); - db.register_function("operand_type", 2, xsql::ScalarFn(sql_operand_type)); - db.register_function("operand_value", 2, xsql::ScalarFn(sql_operand_value)); - // File generation db.register_function("gen_listing", 1, xsql::ScalarFn(sql_gen_listing)); @@ -3545,11 +2605,6 @@ void register_sql_functions(xsql::Database& db) { db.register_function("gen_cfg_dot_file", 2, xsql::ScalarFn(sql_gen_cfg_dot_file)); db.register_function("gen_schema_dot", 0, xsql::ScalarFn(sql_gen_schema_dot)); - // Grep search - db.register_function("grep", 1, xsql::ScalarFn(sql_grep)); - db.register_function("grep", 2, xsql::ScalarFn(sql_grep)); - db.register_function("grep", 3, xsql::ScalarFn(sql_grep)); - // Python execution db.register_function("idapython_snippet", 1, xsql::ScalarFn(sql_idapython_snippet)); db.register_function("idapython_snippet", 2, xsql::ScalarFn(sql_idapython_snippet)); @@ -3560,7 +2615,6 @@ void register_sql_functions(xsql::Database& db) { db.register_function("rebuild_strings", 0, xsql::ScalarFn(sql_rebuild_strings)); db.register_function("rebuild_strings", 1, xsql::ScalarFn(sql_rebuild_strings)); db.register_function("rebuild_strings", 2, xsql::ScalarFn(sql_rebuild_strings)); - db.register_function("string_count", 0, xsql::ScalarFn(sql_string_count)); // Database persistence db.register_function("save_database", 0, xsql::ScalarFn(sql_save_database)); diff --git a/src/lib/src/metadata_welcome.cpp b/src/lib/src/metadata_welcome.cpp index 36d92b0..3499b90 100644 --- a/src/lib/src/metadata_welcome.cpp +++ b/src/lib/src/metadata_welcome.cpp @@ -53,6 +53,7 @@ static void collect_welcome(std::vector& rows) { row.funcs_count = static_cast(get_func_qty()); row.segments_count = static_cast(get_segm_qty()); row.names_count = static_cast(get_nlist_size()); + row.strings_count = static_cast(get_strlist_qty()); std::ostringstream summary; summary << row.processor << " " << (row.is_64bit ? "64-bit" : "32-bit"); @@ -63,6 +64,7 @@ static void collect_welcome(std::vector& rows) { } summary << " | funcs: " << row.funcs_count; summary << " | segs: " << row.segments_count; + summary << " | strings: " << row.strings_count; row.summary = summary.str(); rows.push_back(std::move(row)); @@ -87,6 +89,7 @@ CachedTableDef define_welcome() { .column_int("funcs_count", [](const WelcomeRow& row) -> int { return row.funcs_count; }) .column_int("segments_count", [](const WelcomeRow& row) -> int { return row.segments_count; }) .column_int("names_count", [](const WelcomeRow& row) -> int { return row.names_count; }) + .column_int("strings_count", [](const WelcomeRow& row) -> int { return row.strings_count; }) .build(); } diff --git a/src/lib/src/metadata_welcome.hpp b/src/lib/src/metadata_welcome.hpp index 7e51cac..647a679 100644 --- a/src/lib/src/metadata_welcome.hpp +++ b/src/lib/src/metadata_welcome.hpp @@ -27,6 +27,7 @@ struct WelcomeRow { int funcs_count = 0; int segments_count = 0; int names_count = 0; + int strings_count = 0; }; CachedTableDef define_welcome(); diff --git a/src/lib/src/search_bytes.cpp b/src/lib/src/search_bytes.cpp index 4e2b09e..06a8170 100644 --- a/src/lib/src/search_bytes.cpp +++ b/src/lib/src/search_bytes.cpp @@ -7,196 +7,246 @@ #include "search_bytes.hpp" -#include +#include #include +#include +#include +#include namespace idasql { namespace search { -size_t find_byte_pattern( - const char* pattern, - ea_t start_ea, - ea_t end_ea, - std::vector& results, - size_t max_results) -{ - if (!pattern || !*pattern) return 0; - - compiled_binpat_vec_t binpat; - qstring errbuf; - - if (!parse_binpat_str(&binpat, start_ea, pattern, 16, PBSENC_DEF1BPU, &errbuf)) { - return 0; +namespace { + +constexpr int BYTE_SEARCH_ADDRESS = 0; +constexpr int BYTE_SEARCH_MATCHED_HEX = 1; +constexpr int BYTE_SEARCH_MATCHED_BYTES = 2; +constexpr int BYTE_SEARCH_SIZE = 3; +constexpr int BYTE_SEARCH_PATTERN = 4; +constexpr int BYTE_SEARCH_START_EA = 5; +constexpr int BYTE_SEARCH_END_EA = 6; +constexpr int BYTE_SEARCH_MAX_RESULTS = 7; + +std::string format_matched_hex(const std::vector& bytes) { + std::ostringstream hex; + hex << std::hex << std::setfill('0'); + for (size_t i = 0; i < bytes.size(); i++) { + if (i > 0) hex << " "; + hex << std::setw(2) << static_cast(bytes[i]); } + return hex.str(); +} - if (binpat.empty()) return 0; - - size_t pattern_len = binpat[0].bytes.size(); +void fill_match_result(ByteSearchResult& result, ea_t address, size_t pattern_len) { + result.address = address; + result.matched_bytes.resize(pattern_len); + for (size_t i = 0; i < pattern_len; i++) { + result.matched_bytes[i] = get_byte(address + i); + } + result.matched_hex = format_matched_hex(result.matched_bytes); +} - ea_t ea = start_ea; - size_t count = 0; +ea_t saturating_next_ea(ea_t ea) { + const ea_t max_ea = std::numeric_limits::max(); + if (ea >= max_ea) return max_ea; + return ea + 1; +} - while (ea < end_ea) { - ea_t found = bin_search(ea, end_ea, binpat, BIN_SEARCH_FORWARD); - if (found == BADADDR) break; +ea_t as_ea(const xsql::FunctionArg& value) { + return static_cast(value.as_int64()); +} - ByteSearchResult result; - result.address = found; +size_t as_size(const xsql::FunctionArg& value) { + const int64_t v = value.as_int64(); + return v > 0 ? static_cast(v) : 0; +} - result.matched_bytes.resize(pattern_len); - for (size_t i = 0; i < pattern_len; i++) { - result.matched_bytes[i] = get_byte(found + i); +class EmptyByteSearchGenerator final : public xsql::Generator { +public: + bool next() override { return false; } + const ByteSearchResult& current() const override { return current_; } + int64_t rowid() const override { return 0; } + +private: + ByteSearchResult current_{}; +}; + +class ByteSearchGenerator final : public xsql::Generator { +public: + ByteSearchGenerator(std::string pattern, ea_t start_ea, ea_t end_ea, size_t max_results) + : start_ea_(start_ea), + end_ea_(end_ea), + next_ea_(start_ea), + max_results_(max_results) + { + if (pattern.empty() || end_ea_ <= start_ea_) { + return; } - std::ostringstream hex; - hex << std::hex << std::setfill('0'); - for (size_t i = 0; i < pattern_len; i++) { - if (i > 0) hex << " "; - hex << std::setw(2) << static_cast(result.matched_bytes[i]); + qstring errbuf; + if (!parse_binpat_str(&binpat_, start_ea_, pattern.c_str(), 16, PBSENC_DEF1BPU, &errbuf)) { + return; } - result.matched_hex = hex.str(); - results.push_back(std::move(result)); - count++; - - if (max_results > 0 && count >= max_results) break; + if (binpat_.empty()) { + return; + } - ea = found + 1; + pattern_len_ = binpat_[0].bytes.size(); + valid_ = pattern_len_ > 0; } - return count; -} + bool next() override { + if (!valid_) return false; + if (max_results_ > 0 && emitted_ >= max_results_) return false; + if (next_ea_ >= end_ea_) return false; -ea_t find_first_pattern(const char* pattern, ea_t start_ea, ea_t end_ea) { - if (!pattern || !*pattern) return BADADDR; + ea_t found = bin_search(next_ea_, end_ea_, binpat_, BIN_SEARCH_FORWARD); + if (found == BADADDR) return false; - compiled_binpat_vec_t binpat; - qstring errbuf; - - if (!parse_binpat_str(&binpat, start_ea, pattern, 16, PBSENC_DEF1BPU, &errbuf)) { - return BADADDR; + fill_match_result(current_, found, pattern_len_); + next_ea_ = saturating_next_ea(found); + emitted_++; + rowid_++; + return true; } - if (binpat.empty()) return BADADDR; - - return bin_search(start_ea, end_ea, binpat, BIN_SEARCH_FORWARD); -} - -// ============================================================================ -// SQL Function Implementations -// ============================================================================ - -static void sql_search_bytes_1(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("search_bytes requires pattern argument"); - return; + const ByteSearchResult& current() const override { + return current_; } - const char* pattern = argv[0].as_c_str(); - if (!pattern) { - ctx.result_error("Invalid pattern"); - return; + int64_t rowid() const override { + return rowid_; } +private: + compiled_binpat_vec_t binpat_; + ea_t start_ea_ = BADADDR; + ea_t end_ea_ = BADADDR; + ea_t next_ea_ = BADADDR; + size_t pattern_len_ = 0; + size_t max_results_ = 0; + size_t emitted_ = 0; + int64_t rowid_ = 0; + bool valid_ = false; + ByteSearchResult current_{}; +}; + +std::unique_ptr> make_byte_search_generator( + const std::vector& args) +{ + std::string pattern; ea_t start_ea = inf_get_min_ea(); ea_t end_ea = inf_get_max_ea(); - - std::vector results; - find_byte_pattern(pattern, start_ea, end_ea, results); - - xsql::json arr = xsql::json::array(); - for (const auto& r : results) { - arr.push_back({ - {"address", r.address}, - {"matched_hex", r.matched_hex}, - {"size", r.matched_bytes.size()} - }); - } - - std::string result = arr.dump(); - ctx.result_text(result); -} - -static void sql_search_bytes_3(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("search_bytes requires (pattern, start, end) arguments"); - return; - } - - const char* pattern = argv[0].as_c_str(); - if (!pattern) { - ctx.result_error("Invalid pattern"); - return; + size_t max_results = 0; + + for (const auto& arg : args) { + switch (arg.column_index) { + case BYTE_SEARCH_PATTERN: + if (arg.op == xsql::ConstraintOp::Eq) { + const char* text = arg.value.as_c_str(); + pattern = text ? text : ""; + } + break; + case BYTE_SEARCH_START_EA: + if (arg.op == xsql::ConstraintOp::Eq) { + start_ea = std::max(start_ea, as_ea(arg.value)); + } + break; + case BYTE_SEARCH_END_EA: + if (arg.op == xsql::ConstraintOp::Eq) { + end_ea = std::min(end_ea, as_ea(arg.value)); + } + break; + case BYTE_SEARCH_MAX_RESULTS: + if (arg.op == xsql::ConstraintOp::Eq) { + max_results = as_size(arg.value); + } + break; + case BYTE_SEARCH_ADDRESS: + if (arg.op == xsql::ConstraintOp::Ge) { + start_ea = std::max(start_ea, as_ea(arg.value)); + } else if (arg.op == xsql::ConstraintOp::Gt) { + start_ea = std::max(start_ea, saturating_next_ea(as_ea(arg.value))); + } else if (arg.op == xsql::ConstraintOp::Lt) { + end_ea = std::min(end_ea, as_ea(arg.value)); + } else if (arg.op == xsql::ConstraintOp::Le) { + end_ea = std::min(end_ea, saturating_next_ea(as_ea(arg.value))); + } + break; + default: + break; + } } - ea_t start_ea = static_cast(argv[1].as_int64()); - ea_t end_ea = static_cast(argv[2].as_int64()); - - std::vector results; - find_byte_pattern(pattern, start_ea, end_ea, results); - - xsql::json arr = xsql::json::array(); - for (const auto& r : results) { - arr.push_back({ - {"address", r.address}, - {"matched_hex", r.matched_hex}, - {"size", r.matched_bytes.size()} - }); + if (pattern.empty()) { + return std::make_unique(); } - std::string result = arr.dump(); - ctx.result_text(result); + return std::make_unique(std::move(pattern), start_ea, end_ea, max_results); } -static void sql_search_first_1(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("search_first requires pattern argument"); - return; - } +} // namespace - const char* pattern = argv[0].as_c_str(); - if (!pattern) { - ctx.result_error("Invalid pattern"); - return; - } +size_t find_byte_pattern( + const char* pattern, + ea_t start_ea, + ea_t end_ea, + std::vector& results, + size_t max_results) +{ + results.clear(); - ea_t result = find_first_pattern(pattern, inf_get_min_ea(), inf_get_max_ea()); - if (result != BADADDR) { - ctx.result_int64(static_cast(result)); - } else { - ctx.result_null(); + ByteSearchGenerator generator(pattern ? pattern : "", start_ea, end_ea, max_results); + while (generator.next()) { + results.push_back(generator.current()); } + return results.size(); } -static void sql_search_first_3(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("search_first requires (pattern, start, end) arguments"); - return; - } - - const char* pattern = argv[0].as_c_str(); - if (!pattern) { - ctx.result_error("Invalid pattern"); - return; - } - - ea_t start_ea = static_cast(argv[1].as_int64()); - ea_t end_ea = static_cast(argv[2].as_int64()); - - ea_t result = find_first_pattern(pattern, start_ea, end_ea); - if (result != BADADDR) { - ctx.result_int64(static_cast(result)); - } else { - ctx.result_null(); - } +xsql::GeneratorTableDef define_byte_search() { + return xsql::generator_table("byte_search") + .column_int64("address", [](const ByteSearchResult& row) { + return static_cast(row.address); + }) + .column_text("matched_hex", [](const ByteSearchResult& row) { + return row.matched_hex; + }) + .column_blob("matched_bytes", [](const ByteSearchResult& row) { + return std::vector(row.matched_bytes.begin(), row.matched_bytes.end()); + }) + .column_int("size", [](const ByteSearchResult& row) { + return static_cast(row.matched_bytes.size()); + }) + .hidden_column_text("pattern") + .hidden_column_int64("start_ea") + .hidden_column_int64("end_ea") + .hidden_column_int("max_results") + .full_scan_error( + "byte_search requires WHERE pattern = ''; " + "matched_hex is an output column, not the search input") + .constraint_filter( + { + xsql::required_eq("pattern", "byte_search requires WHERE pattern = ''"), + xsql::optional_eq("start_ea"), + xsql::optional_eq("end_ea"), + xsql::optional_eq("max_results"), + xsql::optional_ge("address"), + xsql::optional_gt("address"), + xsql::optional_lt("address"), + xsql::optional_le("address"), + }, + make_byte_search_generator, + 1.0, + 100.0) + .order_by_consumed("address") + .build(); } -bool register_search_bytes(xsql::Database& db) { - db.register_function("search_bytes", 1, xsql::ScalarFn(sql_search_bytes_1)); - db.register_function("search_bytes", 3, xsql::ScalarFn(sql_search_bytes_3)); - db.register_function("search_first", 1, xsql::ScalarFn(sql_search_first_1)); - db.register_function("search_first", 3, xsql::ScalarFn(sql_search_first_3)); - return true; +bool register_byte_search(xsql::Database& db) { + static auto byte_search = define_byte_search(); + return db.register_generator_table("ida_byte_search", &byte_search) && + db.create_table("byte_search", "ida_byte_search"); } } // namespace search diff --git a/src/lib/src/search_bytes.hpp b/src/lib/src/search_bytes.hpp index 6341f86..18ddf82 100644 --- a/src/lib/src/search_bytes.hpp +++ b/src/lib/src/search_bytes.hpp @@ -6,7 +6,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. /** - * search_bytes.hpp - Binary pattern search functions for IDASQL + * search_bytes.hpp - Binary pattern search table for IDASQL */ #pragma once @@ -14,8 +14,8 @@ #include #include -#include -#include +#include + #include #include @@ -37,9 +37,9 @@ size_t find_byte_pattern( std::vector& results, size_t max_results = 0); -ea_t find_first_pattern(const char* pattern, ea_t start_ea, ea_t end_ea); +xsql::GeneratorTableDef define_byte_search(); -bool register_search_bytes(xsql::Database& db); +bool register_byte_search(xsql::Database& db); } // namespace search } // namespace idasql