From 991bf85aea6bdf22b26b059c7aa7e13d7f8916f2 Mon Sep 17 00:00:00 2001 From: Sergei Patrikeev Date: Mon, 19 Jan 2026 20:12:58 +0000 Subject: [PATCH 1/2] feat: add idl-history command for extracting historical Anchor IDL versions Adds support for fetching and storing historical IDL versions for Solana programs using Anchor-style IDL accounts. ## Key Features - **IDL Account Discovery**: Derive Anchor IDL account addresses using the canonical `anchor:idl` + program_id seed pattern - **Version Detection**: Identify IDL versions through Create and SetBuffer instructions (intermediate Writes are part of sequences, not versions) - **IDL Reconstruction**: Reassemble IDL content from Write instruction history, handling the append-only write pattern - **Buffer Handling**: Track SetBuffer instructions and extract buffer addresses for buffer-based IDL updates - **Decompression**: Handle zlib-compressed IDL data with proper header parsing (8-byte header: 4-byte length + 4-byte discriminator) - **Storage**: Save versions as {slot}.json in output directory ## Implementation Details The extraction algorithm handles two distinct update patterns: 1. **Create Operations**: IDL Writes occur at slots >= create_slot and < next_version_slot (Writes come AFTER Create) 2. **SetBuffer Operations**: IDL content is fetched from the buffer account set during the transaction Key correctness fixes included: - Only Create and SetBuffer mark version boundaries (not Writes) - Verify instruction variant before extracting buffer addresses - Use position-based tracking instead of slot-only for proper ordering ## Module Structure Organized into `src/core/idl/` directory: - address.rs: IDL account address derivation - buffer.rs: SetBuffer operation handling - constants.rs: Header sizes, discriminators, variants - decompression.rs: Zlib decompression utilities - events.rs: Event collection and version tracking - extraction.rs: Main IDL extraction orchestrator - instruction.rs: Instruction parsing and classification - types.rs: Type definitions (IdlModificationType, IdlEvent, etc.) - writes.rs: Write instruction handling and reconstruction ## New Files - src/commands/idl_history.rs: Command implementation - src/core/idl/*.rs: IDL extraction logic modules - docs/ANCHOR_IDL_INTERNALS.md: Technical documentation --- .gitignore | 1 + Cargo.lock | 2 + Cargo.toml | 2 + README.md | 15 + docs/ANCHOR_IDL_INTERNALS.md | 631 ++++++++++++++++++++++++++++++++++ src/commands/history.rs | 17 +- src/commands/idl_history.rs | 128 +++++++ src/commands/mod.rs | 2 + src/core/helius.rs | 27 +- src/core/idl/address.rs | 13 + src/core/idl/buffer.rs | 62 ++++ src/core/idl/constants.rs | 24 ++ src/core/idl/decompression.rs | 62 ++++ src/core/idl/events.rs | 155 +++++++++ src/core/idl/extraction.rs | 174 ++++++++++ src/core/idl/instruction.rs | 48 +++ src/core/idl/mod.rs | 19 + src/core/idl/types.rs | 42 +++ src/core/idl/writes.rs | 119 +++++++ src/core/mod.rs | 1 + src/core/types.rs | 12 +- src/core/upgrades.rs | 10 +- src/main.rs | 24 +- 23 files changed, 1568 insertions(+), 22 deletions(-) create mode 100644 docs/ANCHOR_IDL_INTERNALS.md create mode 100644 src/commands/idl_history.rs create mode 100644 src/core/idl/address.rs create mode 100644 src/core/idl/buffer.rs create mode 100644 src/core/idl/constants.rs create mode 100644 src/core/idl/decompression.rs create mode 100644 src/core/idl/events.rs create mode 100644 src/core/idl/extraction.rs create mode 100644 src/core/idl/instruction.rs create mode 100644 src/core/idl/mod.rs create mode 100644 src/core/idl/types.rs create mode 100644 src/core/idl/writes.rs diff --git a/.gitignore b/.gitignore index ec1ccd9..ea13026 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ save.json signatures.json *.so /historical/ +/historical-idl/ .idea/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 845c270..36ee1fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1004,11 +1004,13 @@ dependencies = [ "dotenvy", "env_logger", "eyre", + "flate2", "governor", "itertools 0.14.0", "log", "reqwest", "serde", + "serde_json", "serde_with", "solana-account", "solana-commitment-config", diff --git a/Cargo.toml b/Cargo.toml index 69591db..7e4ff93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,5 +27,7 @@ eyre = "0.6.12" serde_with = "3.16.1" itertools = "0.14.0" governor = "0.10.4" +flate2 = "1.0" +serde_json = "1.0" diff --git a/README.md b/README.md index 2d3a8e0..f2022ce 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,21 @@ e5a8ba9d4d886cdadc74a00dd6b4de41929af779916c2e01fea0d59ae62d7e7c 382628780.so Outputs are stored under `historical//.so`. If a slot file already exists, it will be skipped. +### Extract historical IDL versions: + +Reconstruct and store historical Anchor IDL versions for a program: + +```bash +./target/release/gg-program-resurrect idl-history --program 6EF8rrecthR5Dkzon8Nwu78hRvfCKubJ14M5uBEwF6P --output ./idl-versions +``` + +The command: +- Derives the Anchor IDL account address using the canonical `anchor:idl` seed pattern +- Detects IDL versions through Create and SetBuffer instructions +- Reconstructs IDL content from Write instruction sequences +- Handles zlib-compressed IDL data +- Saves each version as `{slot}.json` in the output directory + ## License MIT. diff --git a/docs/ANCHOR_IDL_INTERNALS.md b/docs/ANCHOR_IDL_INTERNALS.md new file mode 100644 index 0000000..1664125 --- /dev/null +++ b/docs/ANCHOR_IDL_INTERNALS.md @@ -0,0 +1,631 @@ +# Anchor IDL Storage and Update Internals + +This document explains how Anchor programs store and update their IDL (Interface Definition Language) on-chain. Understanding these internals is essential for building tools that extract historical IDL versions. + +## Table of Contents + +1. [IDL Account Address Derivation](#idl-account-address-derivation) +2. [IDL Account Structure](#idl-account-structure) +3. [IDL Instructions](#idl-instructions) +4. [Instruction Binary Format](#instruction-binary-format) +5. [Update Patterns](#update-patterns) +6. [Understanding IDL Versions](#understanding-idl-versions) +7. [Compression](#compression) +8. [Historical IDL Extraction](#historical-idl-extraction) +9. [Common Pitfalls](#common-pitfalls) + +--- + +## IDL Account Address Derivation + +Anchor derives the IDL account address deterministically from the program ID using a two-step process: + +```rust +// Step 1: Find PDA with empty seeds +let (base, _bump) = Pubkey::find_program_address(&[], &program_id); + +// Step 2: Create address with seed "anchor:idl" +let idl_address = Pubkey::create_with_seed(&base, "anchor:idl", &program_id)?; +``` + +**Key points:** +- The IDL account is NOT a PDA - it's created via `create_with_seed` +- The base address IS a PDA (derived with empty seeds from the program) +- This means the IDL account address is fully deterministic given a program ID + +**Example:** +``` +Program ID: JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4 +IDL Account: C88XWfp26heEmDkmfSzeXP7Fd7GQJ2j9dDTUsyiZbUTa +``` + +--- + +## IDL Account Structure + +The IDL account has a 44-byte header followed by compressed IDL data: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Offset │ Size │ Field │ Description │ +├────────┼────────┼──────────────┼────────────────────────────┤ +│ 0 │ 8 │ discriminator│ Account type identifier │ +│ 8 │ 32 │ authority │ Pubkey that can modify IDL │ +│ 40 │ 4 │ data_len │ Length of compressed data │ +│ 44 │ N │ data │ Zlib-compressed IDL JSON │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Total header size: 44 bytes** + +The `data_len` field is a little-endian `u32` that specifies exactly how many bytes of compressed data follow the header. + +--- + +## IDL Instructions + +Anchor defines a set of instructions for managing IDL accounts. These are NOT part of the program's own instruction set - they're built into the Anchor framework and dispatched separately using a special prefix tag. + +### Instruction Variants + +| Variant | Value | Description | +|---------|-------|-------------| +| `Create` | 0 | Create a new IDL account (initially empty) | +| `CreateBuffer` | 1 | Create a temporary buffer account for staging | +| `Write` | 2 | Append data to an account (IDL or buffer) | +| `SetBuffer` | 3 | Atomically copy buffer contents to IDL account | +| `SetAuthority` | 4 | Transfer IDL authority to a new pubkey | +| `Close` | 5 | Close the IDL account and recover rent | +| `Resize` | 6 | Resize the IDL account (increase capacity) | + +### Instruction Details + +#### Create (variant 0) + +Creates a new IDL account. **Important:** The Create instruction only allocates the account - it does NOT populate it with IDL data. The data is written via subsequent `Write` instructions. + +**Accounts:** +1. `payer` - Pays for account creation +2. `idl` - The IDL account to create +3. `authority` - Will become the IDL authority +4. `system_program` + +**Timeline of a Create operation:** +``` +Slot N: Create → Empty IDL account created +Slot N+1: Write(1) → First chunk appended +Slot N+2: Write(2) → Second chunk appended +... +Slot N+K: Write(K) → Final chunk appended, IDL now complete +``` + +#### CreateBuffer (variant 1) + +Creates a temporary buffer account for staging IDL updates. Buffers are used when you want atomic updates to an existing IDL. + +**Accounts:** +1. `payer` - Pays for account creation +2. `buffer` - The buffer account to create +3. `authority` - Buffer authority +4. `system_program` + +**Important:** Buffer accounts can be **reused** across multiple IDL updates. A single buffer address may have thousands of transactions spanning many different IDL versions. + +#### Write (variant 2) + +**Appends** data to an IDL or buffer account. This is an **append-only** operation - there is no offset parameter. + +**Accounts:** +1. `target` - IDL or buffer account to write to +2. `authority` - Must sign + +**Data payload:** +- `Vec` - Bytes to append (Borsh-encoded: 4-byte length prefix + raw bytes) + +**Critical insight:** Each `Write` instruction appends its payload to the **end** of the existing data. The target account maintains a write pointer that advances with each Write. To reconstruct the full data, you must concatenate all Write payloads in **chronological order** (oldest slot first). + +#### SetBuffer (variant 3) + +Atomically replaces the IDL account's data with the buffer's contents. This is the key instruction that marks a new IDL "version" when using the buffer pattern. + +**Accounts:** +1. `buffer` - Source buffer account (index 0) +2. `idl` - Target IDL account (index 1) +3. `authority` - Must sign + +After this instruction, the buffer is typically closed (but not always - buffers can be reused). + +#### SetAuthority (variant 4) + +Transfers IDL authority to a new pubkey. + +**Accounts:** +1. `idl` - The IDL account +2. `authority` - Current authority (signer) +3. `new_authority` - New authority pubkey + +#### Close (variant 5) + +Closes the IDL account and returns rent to a recipient. + +**Accounts:** +1. `idl` - Account to close +2. `authority` - Must sign +3. `recipient` - Receives the rent + +#### Resize (variant 6) + +Increases the IDL account's capacity. + +**Accounts:** +1. `idl` - Account to resize +2. `authority` - Must sign +3. `system_program` + +--- + +## Instruction Binary Format + +Anchor IDL instructions use a special prefix to distinguish them from regular program instructions. + +### IDL Instruction Tag + +All IDL instructions are prefixed with an 8-byte tag: + +```rust +const IDL_IX_TAG: u64 = 0x0a69e9a778bcf440; +``` + +In little-endian byte order: `[0x40, 0xf4, 0xbc, 0x78, 0xa7, 0xe9, 0x69, 0x0a]` + +This tag is derived from `sha256("anchor:idl")[..8]` interpreted as a little-endian u64. + +### Complete Instruction Layout + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Offset │ Size │ Field │ Description │ +├────────┼──────┼────────────┼─────────────────────────────────────┤ +│ 0 │ 8 │ IDL_IX_TAG │ 0x40f4bc78a7e9690a (LE) │ +│ 8 │ 1 │ variant │ Instruction variant (0-6) │ +│ 9 │ ... │ args │ Variant-specific arguments │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Write Instruction Example + +For a `Write` instruction with 1000 bytes of data: + +``` +Bytes 0-7: 40 f4 bc 78 a7 e9 69 0a (IDL_IX_TAG) +Byte 8: 02 (variant = Write) +Bytes 9-12: e8 03 00 00 (data length = 1000, u32 LE) +Bytes 13+: [1000 bytes of data] (the actual payload) +``` + +The data length is encoded as a Borsh `Vec`, which is a 4-byte little-endian length prefix followed by the raw bytes. + +### Parsing IDL Instructions + +```rust +const IDL_IX_TAG_LE: [u8; 8] = [0x40, 0xf4, 0xbc, 0x78, 0xa7, 0xe9, 0x69, 0x0a]; + +fn parse_idl_instruction(data: &[u8]) -> Option<(u8, &[u8])> { + if data.len() < 9 { + return None; + } + if data[..8] != IDL_IX_TAG_LE { + return None; + } + let variant = data[8]; + let args = &data[9..]; + Some((variant, args)) +} +``` + +--- + +## Update Patterns + +There are two patterns for creating/updating an IDL: + +### Pattern 1: Create + Direct Writes (Initial Creation) + +Used when creating a new IDL for the first time: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Slot │ Instruction │ Effect │ +├───────────┼─────────────┼───────────────────────────────────────┤ +│ 357165216 │ Create │ Empty IDL account created │ +│ 357165222 │ Write │ Chunk 1 appended to IDL account │ +│ 357165228 │ Write │ Chunk 2 appended │ +│ 357165258 │ Write │ Chunk 3 appended │ +│ ... │ ... │ ... │ +│ 357165316 │ Write │ Final chunk - IDL now complete │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Key insight:** The Create instruction marks the "version slot", but the actual IDL data comes from Write instructions at **later** slots. + +### Pattern 2: Buffer + SetBuffer (Updates) + +Used when updating an existing IDL: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Slot │ Instruction │ Effect │ +├───────────┼────────────────┼────────────────────────────────────┤ +│ 388820350 │ CreateBuffer │ Buffer account B created │ +│ 388820351 │ Write(B) │ Chunk 1 appended to buffer │ +│ 388820352 │ Write(B) │ Chunk 2 appended to buffer │ +│ 388820353 │ Write(B) │ Chunk 3 appended to buffer │ +│ 388820360 │ SetBuffer(B→I) │ Buffer copied to IDL atomically │ +│ 388820361 │ Close(B) │ Buffer closed (optional) │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Key insight:** The SetBuffer instruction marks the "version slot". All Write instructions target the **buffer**, not the IDL account directly. + +### Why Use Buffers? + +1. **Atomic updates:** The IDL is either fully old or fully new, never partial +2. **Non-blocking:** The main IDL remains readable during the multi-tx upload +3. **Failure recovery:** If upload fails, the original IDL is unchanged + +--- + +## Understanding IDL Versions + +A critical concept for historical extraction: **what constitutes an IDL "version"?** + +### Version-Defining Operations + +Only these operations represent a new IDL version: + +| Operation | When it defines a version | +|-----------|--------------------------| +| `Create` | Always - marks initial IDL creation | +| `SetBuffer` | Always - marks an atomic IDL update | + +### Non-Version Operations + +These do NOT represent new versions on their own: + +| Operation | Why not a version | +|-----------|------------------| +| `Write` | Intermediate step - part of a sequence | +| `CreateBuffer` | Buffer creation, not IDL modification | +| `Resize` | Capacity change, not content change | +| `SetAuthority` | Authority change, not content change | +| `Close` | Deletion, not a new version | + +### The Write Trap + +A common mistake is treating each `Write` transaction as a separate IDL version. This is incorrect because: + +1. Writes are **append operations** that build up data incrementally +2. A single IDL update may span **8-10+ Write transactions** +3. Each intermediate Write produces incomplete/invalid data + +**Example of incorrect version detection:** +``` +Detected "versions" (WRONG): +- 357165216 (Create) +- 357165222 (Write) ← Invalid! Only 1/8 of the data +- 357165228 (Write) ← Invalid! Only 2/8 of the data +- 357165258 (Write) ← Invalid! Only 3/8 of the data +... +``` + +**Correct version detection:** +``` +Actual versions (CORRECT): +- 357165216 (Create) ← First version, data comes from subsequent Writes +``` + +--- + +## Compression + +IDL data is always stored in **zlib-compressed** format. + +### Compression Details + +- Algorithm: zlib (RFC 1950) +- Typical compression ratio: 4-6x for JSON +- zlib header: First byte is typically `0x78` (indicates zlib) + - `0x78 0x9c` - default compression + - `0x78 0x01` - no compression + - `0x78 0xda` - best compression + +### Decompression in Rust + +```rust +use flate2::read::ZlibDecoder; +use std::io::Read; + +fn decompress_idl(compressed: &[u8]) -> Result { + let mut decoder = ZlibDecoder::new(compressed); + let mut decompressed = String::new(); + decoder.read_to_string(&mut decompressed)?; + Ok(decompressed) +} +``` + +### Example Sizes + +For a typical program IDL: +- Compressed: ~3,800 bytes +- Decompressed: ~23,500 bytes (JSON) +- Compression ratio: ~6.2x + +--- + +## Historical IDL Extraction + +Extracting historical IDL versions requires understanding the transaction history and correctly handling both Create and SetBuffer patterns. + +### The Challenge + +Helius (and most RPC providers) return **parsed transactions**, not historical account states. You cannot simply fetch "the IDL account at slot X". Instead, you must reconstruct the IDL from the transaction inputs. + +### High-Level Algorithm + +``` +1. Derive IDL account address from program ID +2. Fetch all transactions that touched the IDL account +3. Identify version-defining transactions (Create, SetBuffer only) +4. Sort versions by slot ascending +5. For each version, extract IDL content based on operation type +6. Save each version as {slot}.json +``` + +### Detailed Extraction Algorithm + +``` +FOR each version IN versions (sorted by slot ascending): + + IF version.operation == SetBuffer: + # Buffer-based update + 1. Parse SetBuffer instruction to get buffer address + - Buffer is at accounts[0], IDL is at accounts[1] + 2. Fetch ALL transactions for that buffer address + 3. Collect all Write instructions targeting the buffer + 4. Sort writes by slot ascending (oldest first) + 5. Concatenate all write payloads + 6. Decompress the result + + ELSE IF version.operation == Create: + # Direct write to IDL account + 1. Find all Write instructions targeting the IDL account + 2. Filter writes where: + - slot >= version.slot (Writes come AFTER Create) + - slot < next_version.slot (don't include writes for next version) + 3. Sort writes by slot ascending + 4. Concatenate all write payloads + 5. Decompress the result +``` + +### Identifying IDL Instructions + +To properly identify IDL instructions: + +```rust +const IDL_IX_TAG_LE: [u8; 8] = [0x40, 0xf4, 0xbc, 0x78, 0xa7, 0xe9, 0x69, 0x0a]; + +const IDL_VARIANT_CREATE: u8 = 0; +const IDL_VARIANT_CREATE_BUFFER: u8 = 1; +const IDL_VARIANT_WRITE: u8 = 2; +const IDL_VARIANT_SET_BUFFER: u8 = 3; + +fn classify_idl_instruction(data: &[u8]) -> Option { + if data.len() < 9 { + return None; + } + if data[..8] != IDL_IX_TAG_LE { + return None; + } + + match data[8] { + IDL_VARIANT_CREATE => Some(IdlOperation::Create), + IDL_VARIANT_WRITE => Some(IdlOperation::Write), + IDL_VARIANT_SET_BUFFER => Some(IdlOperation::SetBuffer), + _ => Some(IdlOperation::Other), + } +} +``` + +### Extracting Buffer Address from SetBuffer + +**Critical:** You must verify the instruction is actually SetBuffer before extracting the buffer: + +```rust +fn extract_buffer_from_set_buffer( + instruction: &Instruction, + program_id: &Pubkey, + idl_address: &Pubkey, +) -> Option { + // Must be our program + if instruction.program_id != *program_id { + return None; + } + + // MUST verify this is actually SetBuffer (variant 3) + let variant = parse_idl_instruction_variant(&instruction.data)?; + if variant != IDL_VARIANT_SET_BUFFER { + return None; // Could be Create with IDL at index 1! + } + + // SetBuffer accounts: [buffer, idl, authority] + let accounts = &instruction.accounts; + if accounts.get(1) != Some(idl_address) { + return None; + } + + accounts.first().copied() +} +``` + +### Buffer Reuse Warning + +Buffer accounts can be **reused** across multiple IDL updates. A single buffer address may contain transactions from different update sequences. + +When reconstructing from a buffer, you must carefully filter Writes: +- Only include Writes that happened **before** the specific SetBuffer +- The buffer may contain data from later updates if reused + +### Write Ordering + +Writes must be concatenated in **chronological order** (oldest slot first): + +```rust +// Sort by slot ascending +writes.sort_by_key(|w| w.slot); + +// Concatenate all payloads +let compressed: Vec = writes + .iter() + .flat_map(|w| w.data.clone()) + .collect(); + +// Decompress +let idl_json = decompress_idl(&compressed)?; +``` + +--- + +## Common Pitfalls + +### 1. Treating Writes as Versions + +**Wrong:** Detecting every Write as a separate IDL version +**Correct:** Only Create and SetBuffer define versions; Writes are intermediate steps + +### 2. Assuming Offset-Based Writes + +**Wrong:** Treating Write as "write at offset X" +**Correct:** Write always **appends** to the end - there is no offset parameter + +### 3. Not Verifying Instruction Variant + +**Wrong:** Assuming any instruction with IDL at accounts[1] is SetBuffer +**Correct:** Always check the variant byte (data[8]) before extracting buffer + +```rust +// WRONG - matches Create, Write, and other instructions too +if accounts.get(1) == Some(&idl_address) { + let buffer = accounts[0]; // Could be payer, not buffer! +} + +// CORRECT - verify variant first +let variant = data[8]; +if variant == IDL_VARIANT_SET_BUFFER && accounts.get(1) == Some(&idl_address) { + let buffer = accounts[0]; // Actually a buffer +} +``` + +### 4. Wrong Slot Filtering for Create + +**Wrong:** For Create at slot X, looking for Writes with slot <= X +**Correct:** For Create at slot X, look for Writes with slot >= X and < next_version_slot + +```rust +// WRONG - misses all the Writes that come AFTER Create +let writes = all_writes.filter(|w| w.slot <= create_slot); + +// CORRECT - Writes come after Create, before next version +let writes = all_writes.filter(|w| + w.slot >= create_slot && + w.slot < next_version_slot.unwrap_or(u64::MAX) +); +``` + +### 5. Wrong Account Order for SetBuffer + +**Wrong:** Assuming `[idl, buffer, authority]` +**Correct:** Order is `[buffer, idl, authority]` + +### 6. Ignoring Inner Instructions + +IDL instructions may appear as inner instructions in CPIs. Always check both top-level and inner instructions: + +```rust +for instruction in &transaction.instructions { + check_instruction(instruction); + + if let Some(inner) = &instruction.inner_instructions { + for inner_instruction in inner { + check_instruction(inner_instruction); + } + } +} +``` + +### 7. Not Handling Buffer Reuse + +**Wrong:** Fetching all buffer transactions and using all Writes +**Correct:** Only use Writes that happened before the specific SetBuffer + +### 8. Forgetting the Vec Length Prefix + +Write instruction payload is Borsh-encoded as `Vec`: +- Bytes 9-12: Length prefix (u32 LE) +- Bytes 13+: Actual data + +```rust +// WRONG +let data = &instruction_data[9..]; + +// CORRECT +let len = u32::from_le_bytes(instruction_data[9..13].try_into()?) as usize; +let data = &instruction_data[13..13 + len]; +``` + +--- + +## Real-World Example + +### Program: pump.fun (6EF8rrecthR5Dkzon8Nwu78hRvfCKubJ14M5uBEwF6P) + +**IDL Account:** AYgC53tU5BbP2NAnv5nConJxAdpQZctvmZK88pu69xRs + +**Version History (13 versions):** + +``` +Slot │ Operation │ Size │ Notes +───────────┼────────────┼─────────┼───────────────────────── +357165216 │ Create │ 87 KB │ Initial IDL (8 Write txs follow) +357804590 │ SetBuffer │ 92 KB │ First update +357804868 │ SetBuffer │ 92 KB │ +358446304 │ SetBuffer │ 93 KB │ +... │ ... │ ... │ +392395487 │ SetBuffer │ 144 KB │ Most recent +``` + +**Initial Creation Sequence (Create at 357165216):** + +``` +Slot │ Operation │ Data Size │ Cumulative +───────────┼───────────┼───────────┼─────────── +357165216 │ Create │ 0 │ 0 (empty account) +357165222 │ Write │ 600 │ 600 +357165228 │ Write │ 600 │ 1200 +357165258 │ Write │ 600 │ 1800 +357165264 │ Write │ 600 │ 2400 +357165272 │ Write │ 600 │ 3000 +357165281 │ Write │ 600 │ 3600 +357165309 │ Write │ 600 │ 4200 +357165316 │ Write │ 339 │ 4539 (complete) +``` + +After decompression: 87,431 bytes of valid JSON IDL. + +--- + +## References + +- [Anchor IDL source code](https://github.com/coral-xyz/anchor/blob/master/lang/syn/src/idl/build.rs) +- [IDL instruction dispatch (historical)](https://github.com/coral-xyz/anchor/blob/63b7177e/lang/attribute/program/src/lib.rs) +- [Solana create_with_seed](https://docs.rs/solana-sdk/latest/solana_sdk/pubkey/struct.Pubkey.html#method.create_with_seed) +- [flate2 crate for zlib](https://docs.rs/flate2/latest/flate2/) diff --git a/src/commands/history.rs b/src/commands/history.rs index afcb72f..d845179 100644 --- a/src/commands/history.rs +++ b/src/commands/history.rs @@ -1,18 +1,16 @@ -use std::path::Path; -use std::sync::Arc; +use std::{path::Path, sync::Arc}; use eyre::eyre; use log::info; use solana_loader_v3_interface::state::UpgradeableLoaderState::Program; use solana_pubkey::Pubkey; -use tokio::sync::Semaphore; -use tokio::task::JoinSet; +use tokio::{sync::Semaphore, task::JoinSet}; use wincode::Deserialize; -use crate::core::helius::Helius; -use crate::core::reconstruct::reconstruct_from_transactions; -use crate::core::types::UpgradeableLoaderState; -use crate::core::upgrades::collect_program_change_entries; +use crate::core::{ + helius::Helius, reconstruct::reconstruct_from_transactions, types::UpgradeableLoaderState, + upgrades::collect_program_change_entries, +}; pub async fn reconstruct_older_versions( program: &Pubkey, @@ -70,7 +68,8 @@ pub async fn reconstruct_older_versions( } info!("Reconstructing slot {} from buffer {}", entry.slot, entry.buffer); - // prgm len >= max write buffer offset+data so we have to track total len for checksums to match + // prgm len >= max write buffer offset+data so we have to track total len for checksums + // to match let extensions_after_this_slot: usize = extensions_by_slot .iter() .filter(|ext| ext.slot > entry.slot) diff --git a/src/commands/idl_history.rs b/src/commands/idl_history.rs new file mode 100644 index 0000000..8060519 --- /dev/null +++ b/src/commands/idl_history.rs @@ -0,0 +1,128 @@ +//! IDL History command implementation +//! +//! Fetches and stores historical IDL versions for Solana programs using +//! Anchor-style IDL format. + +use std::path::Path; +use std::sync::Arc; + +use log::{info, warn}; +use solana_pubkey::Pubkey; + +use crate::core::{ + helius::Helius, + idl::{collect_anchor_idl_versions, derive_anchor_idl_address, extract_anchor_idl_from_slot}, +}; + +/// Fetches and stores historical IDL versions for a program using Anchor format. +pub async fn fetch_idl_history( + program: &Pubkey, + min_slot: u64, + output_dir: &Path, + max_rps: u32, +) -> eyre::Result<()> { + dotenvy::dotenv().ok(); + + let helius = Arc::new(Helius::new(max_rps)?); + + let program_dir = output_dir.join(program.to_string()); + + process_anchor_idl(&helius, program, min_slot, &program_dir).await +} + +/// Processes Anchor-style IDL history. +async fn process_anchor_idl( + helius: &Arc, + program: &Pubkey, + min_slot: u64, + program_dir: &Path, +) -> eyre::Result<()> { + let idl_address = derive_anchor_idl_address(program); + info!("Checking Anchor IDL at {}", idl_address); + + // Check if the IDL account has any transactions + let test_txs = helius.fetch_txs_for_account(&idl_address, &None).await?; + if test_txs.is_empty() { + info!("No Anchor IDL found for program {}", program); + return Ok(()); + } + + info!("Found Anchor IDL account, collecting versions..."); + + // Collect all versions + let mut versions = + collect_anchor_idl_versions(helius, program, &idl_address, min_slot).await?; + + if versions.is_empty() { + info!("No Anchor IDL versions found at or after slot {}", min_slot); + return Ok(()); + } + + // Sort versions by slot ascending to determine next_version_slot for each + versions.sort_by_key(|v| v.slot); + + info!("Found {} Anchor IDL version(s)", versions.len()); + + // Fetch all transactions for the IDL account + let all_idl_txs = helius.keep_fetching_txs_for_account(&idl_address).await?; + + // Create output directory + let anchor_dir = program_dir.join("anchor"); + tokio::fs::create_dir_all(&anchor_dir).await?; + + // Find the most recent slot + let most_recent_slot = versions.iter().map(|v| v.slot).max().unwrap_or(0); + + // Process versions sequentially for clean logs + for (i, version) in versions.iter().enumerate() { + let next_version_slot = versions.get(i + 1).map(|v| v.slot); + let is_most_recent = version.slot == most_recent_slot; + + let out_path = anchor_dir.join(format!("{}.json", version.slot)); + if tokio::fs::metadata(&out_path).await.is_ok() { + info!("Skipping slot {} (already stored)", version.slot); + continue; + } + + info!( + "Extracting IDL [{}/{}] slot {}", + i + 1, + versions.len(), + version.slot + ); + + match extract_anchor_idl_from_slot( + helius, + program, + &idl_address, + version, + &all_idl_txs, + is_most_recent, + next_version_slot, + ) + .await + { + Ok(Some(idl)) => { + // Validate it's valid JSON and format it + match serde_json::from_str::(&idl) { + Ok(json) => { + let formatted = serde_json::to_string_pretty(&json)?; + tokio::fs::write(&out_path, &formatted).await?; + info!("Wrote {}", out_path.display()); + } + Err(e) => { + warn!("Invalid JSON for slot {}: {}", version.slot, e); + } + } + } + Ok(None) => { + warn!("Could not extract IDL for slot {}", version.slot); + } + Err(e) => { + warn!("Error extracting IDL for slot {}: {}", version.slot, e); + } + } + } + + Ok(()) +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index c16c935..b7e26e0 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,3 +1,5 @@ mod history; +mod idl_history; pub use history::reconstruct_older_versions; +pub use idl_history::fetch_idl_history; diff --git a/src/core/helius.rs b/src/core/helius.rs index 9649bcb..cbff98a 100644 --- a/src/core/helius.rs +++ b/src/core/helius.rs @@ -1,17 +1,22 @@ use crate::core::types::HeliusTx; use eyre::eyre; use itertools::Itertools; +use log::debug; use reqwest::Client; use serde::Serialize; use solana_account::Account; use solana_commitment_config::CommitmentConfig; use solana_pubkey::Pubkey; -use solana_rpc_client::api::response::transaction::Signature; -use solana_rpc_client::nonblocking::rpc_client::RpcClient; +use solana_rpc_client::{ + api::response::transaction::Signature, nonblocking::rpc_client::RpcClient, +}; use std::{env, num::NonZeroU32, sync::Arc}; -use governor::state::direct::NotKeyed; -use governor::{Quota, RateLimiter, clock::DefaultClock, state::InMemoryState}; +use governor::{ + Quota, RateLimiter, + clock::DefaultClock, + state::{InMemoryState, direct::NotKeyed}, +}; const HELIUS_RPC_BASE: &str = "https://mainnet.helius-rpc.com"; const HELIUS_API_BASE: &str = "https://api-mainnet.helius-rpc.com/v0"; @@ -31,7 +36,9 @@ impl Helius { let rpc_url = format!("{HELIUS_RPC_BASE}/?api-key={api_key}"); let rpc = RpcClient::new_with_commitment(rpc_url, CommitmentConfig::finalized()); - let http = Client::new(); + let http = Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build()?; let q = Quota::per_second(NonZeroU32::new(max_rps).ok_or_else(|| eyre!("invalid max_rps"))?); @@ -58,6 +65,7 @@ impl Helius { ) -> eyre::Result> { let mut all: Vec = Vec::new(); let mut before = None; + let mut page = 0; loop { let mut batch = fetch_helius_transactions_page( @@ -71,10 +79,19 @@ impl Helius { if batch.is_empty() { break; } + page += 1; + debug!( + "Fetched page {} for {} ({} txs, total {})", + page, + address, + batch.len(), + all.len() + batch.len() + ); before = batch.last().and_then(|item| item.signature); all.append(&mut batch); } + debug!("Finished fetching {} for {} ({} total txs)", page, address, all.len()); Ok(all.into_iter().unique_by(|tx| tx.signature).collect()) } diff --git a/src/core/idl/address.rs b/src/core/idl/address.rs new file mode 100644 index 0000000..58f2cbd --- /dev/null +++ b/src/core/idl/address.rs @@ -0,0 +1,13 @@ +//! IDL account address derivation. + +use solana_pubkey::Pubkey; + +/// Derives the Anchor IDL address for a given program. +/// +/// The derivation follows Anchor's convention: +/// 1. Find PDA with empty seeds using program as base +/// 2. Create address with seed "anchor:idl" using base and program +pub fn derive_anchor_idl_address(program_id: &Pubkey) -> Pubkey { + let (base, _) = Pubkey::find_program_address(&[], program_id); + Pubkey::create_with_seed(&base, "anchor:idl", program_id).expect("create_with_seed failed") +} diff --git a/src/core/idl/buffer.rs b/src/core/idl/buffer.rs new file mode 100644 index 0000000..38ae3d9 --- /dev/null +++ b/src/core/idl/buffer.rs @@ -0,0 +1,62 @@ +//! Buffer extraction from SetBuffer operations. + +use solana_pubkey::Pubkey; + +use super::constants::IDL_VARIANT_SET_BUFFER; +use super::instruction::parse_idl_instruction_variant; +use crate::core::types::{HeliusInstruction, HeliusTx}; + +/// Extracts the buffer account from an `__idl_set_buffer` instruction. +/// +/// For set_buffer, the account layout is: [buffer, idl_account, authority] +fn extract_buffer_from_set_buffer( + instr: &HeliusInstruction, + program_id: &Pubkey, + idl_address: &Pubkey, +) -> Option { + if instr.program_id != *program_id { + return None; + } + + // Verify this is actually a SetBuffer instruction + let variant = parse_idl_instruction_variant(instr)?; + if variant != IDL_VARIANT_SET_BUFFER { + return None; + } + + let accounts = instr.accounts.as_ref()?; + + // Check if IDL account is second account (index 1) + if accounts.get(1) != Some(idl_address) { + return None; + } + + // Buffer is first account (index 0) + accounts.first().copied() +} + +/// Attempts to find the buffer used in an IDL-modifying transaction. +pub(super) fn find_idl_buffer_in_transaction( + tx: &HeliusTx, + program_id: &Pubkey, + idl_address: &Pubkey, +) -> Option { + let instrs = tx.instructions.as_ref()?; + + for instr in instrs { + if let Some(buffer) = extract_buffer_from_set_buffer(instr, program_id, idl_address) { + return Some(buffer); + } + if let Some(inner) = &instr.inner_instructions { + for inner_instr in inner { + if let Some(buffer) = + extract_buffer_from_set_buffer(inner_instr, program_id, idl_address) + { + return Some(buffer); + } + } + } + } + + None +} diff --git a/src/core/idl/constants.rs b/src/core/idl/constants.rs new file mode 100644 index 0000000..8f66656 --- /dev/null +++ b/src/core/idl/constants.rs @@ -0,0 +1,24 @@ +//! Constants for Anchor IDL instruction parsing. + +/// Anchor IDL account header size: +/// - 8 bytes: discriminator +/// - 32 bytes: authority pubkey +/// - 4 bytes: data_len (u32 LE) +pub const ANCHOR_IDL_HEADER_SIZE: usize = 44; + +/// IDL instruction tag: sha256("anchor:idl")[..8] in little-endian. +/// This is the prefix for all Anchor IDL instructions. +pub const IDL_IX_TAG_LE: [u8; 8] = [0x40, 0xf4, 0xbc, 0x78, 0xa7, 0xe9, 0x69, 0x0a]; + +/// IDL instruction variants. +pub const IDL_VARIANT_CREATE: u8 = 0; +#[allow(dead_code)] +pub const IDL_VARIANT_CREATE_BUFFER: u8 = 1; +pub const IDL_VARIANT_WRITE: u8 = 2; +pub const IDL_VARIANT_SET_BUFFER: u8 = 3; +#[allow(dead_code)] +pub const IDL_VARIANT_SET_AUTHORITY: u8 = 4; +#[allow(dead_code)] +pub const IDL_VARIANT_CLOSE: u8 = 5; +#[allow(dead_code)] +pub const IDL_VARIANT_RESIZE: u8 = 6; diff --git a/src/core/idl/decompression.rs b/src/core/idl/decompression.rs new file mode 100644 index 0000000..fa21f98 --- /dev/null +++ b/src/core/idl/decompression.rs @@ -0,0 +1,62 @@ +//! IDL decompression utilities. + +use std::io::Read; + +use eyre::eyre; +use flate2::read::ZlibDecoder; + +use super::constants::ANCHOR_IDL_HEADER_SIZE; + +/// Extracts and decompresses an Anchor IDL from raw account data. +/// +/// Anchor IDL account layout: +/// - 8 bytes: discriminator +/// - 32 bytes: authority pubkey +/// - 4 bytes: data_len (u32 LE) +/// - N bytes: zlib-compressed IDL JSON +pub fn decompress_anchor_idl(account_data: &[u8]) -> eyre::Result { + if account_data.len() < ANCHOR_IDL_HEADER_SIZE { + return Err(eyre!( + "Account data too small: {} bytes (need at least {})", + account_data.len(), + ANCHOR_IDL_HEADER_SIZE + )); + } + + // Read data length + let data_len_bytes: [u8; 4] = account_data[40..44].try_into()?; + let data_len = u32::from_le_bytes(data_len_bytes) as usize; + + if account_data.len() < ANCHOR_IDL_HEADER_SIZE + data_len { + return Err(eyre!( + "Account data incomplete: {} bytes (expected {} for data)", + account_data.len(), + ANCHOR_IDL_HEADER_SIZE + data_len + )); + } + + let compressed = &account_data[ANCHOR_IDL_HEADER_SIZE..ANCHOR_IDL_HEADER_SIZE + data_len]; + + // Decompress using zlib + let mut decoder = ZlibDecoder::new(compressed); + let mut decompressed = String::new(); + decoder.read_to_string(&mut decompressed)?; + + Ok(decompressed) +} + +/// Decompresses raw zlib-compressed IDL data (without account header). +/// +/// This is used when reconstructing IDL from Write instruction data, +/// which contains the raw compressed bytes without the account header. +pub fn decompress_raw_idl(compressed: &[u8]) -> eyre::Result { + if compressed.is_empty() { + return Err(eyre!("Compressed data is empty")); + } + + let mut decoder = ZlibDecoder::new(compressed); + let mut decompressed = String::new(); + decoder.read_to_string(&mut decompressed)?; + + Ok(decompressed) +} diff --git a/src/core/idl/events.rs b/src/core/idl/events.rs new file mode 100644 index 0000000..7456385 --- /dev/null +++ b/src/core/idl/events.rs @@ -0,0 +1,155 @@ +//! IDL event collection and version tracking. + +use log::info; +use solana_pubkey::Pubkey; +use solana_rpc_client::api::response::transaction::Signature; + +use super::instruction::classify_idl_instruction; +use super::types::{IdlEvent, IdlModificationType}; +use crate::core::helius::Helius; +use crate::core::types::IdlVersionEntry; + +/// Collects all IDL events for an Anchor program's IDL account. +/// +/// Returns all events (Create, Write, SetBuffer, Other) found on the account, +/// sorted by slot descending (most recent first). +pub(super) async fn collect_anchor_idl_events( + helius: &Helius, + program_id: &Pubkey, + idl_address: &Pubkey, + min_slot: u64, +) -> eyre::Result> { + let mut events = Vec::new(); + let mut before: Option = None; + + loop { + let batch = helius.fetch_txs_for_account(idl_address, &before).await?; + if batch.is_empty() { + break; + } + + for tx in &batch { + let Some(slot) = tx.slot else { + continue; + }; + if slot < min_slot { + // Log summary before returning + log_events_summary(&events); + return Ok(events); + } + let Some(instrs) = &tx.instructions else { + continue; + }; + + // Determine the type of IDL modification + let mut mod_type: Option = None; + + for instr in instrs { + if let Some(t) = classify_idl_instruction(instr, program_id, idl_address) { + // SetBuffer takes precedence + if t == IdlModificationType::SetBuffer { + mod_type = Some(t); + break; + } + if mod_type.is_none() || t == IdlModificationType::Create { + mod_type = Some(t); + } + } + // Check inner instructions + if let Some(inner) = &instr.inner_instructions { + for inner_instr in inner { + if let Some(t) = + classify_idl_instruction(inner_instr, program_id, idl_address) + { + if t == IdlModificationType::SetBuffer { + mod_type = Some(t); + break; + } + if mod_type.is_none() || t == IdlModificationType::Create { + mod_type = Some(t); + } + } + } + } + } + + if let Some(t) = mod_type { + if let Some(sig) = tx.signature { + events.push(IdlEvent { slot, signature: sig, modification_type: t }); + } + } + } + + before = batch.last().and_then(|tx| tx.signature); + } + + log_events_summary(&events); + Ok(events) +} + +/// Logs a summary of collected IDL events. +fn log_events_summary(events: &[IdlEvent]) { + if events.is_empty() { + return; + } + + let creates = events.iter().filter(|e| e.modification_type == IdlModificationType::Create).count(); + let writes = events.iter().filter(|e| e.modification_type == IdlModificationType::Write).count(); + let set_buffers = events.iter().filter(|e| e.modification_type == IdlModificationType::SetBuffer).count(); + let others = events.iter().filter(|e| e.modification_type == IdlModificationType::Other).count(); + + info!( + "IDL account history: {} events total ({} Create, {} Write, {} SetBuffer, {} Other)", + events.len(), creates, writes, set_buffers, others + ); + + // Log timeline of version-boundary events (Create and SetBuffer) + let mut version_events: Vec<_> = events + .iter() + .filter(|e| { + e.modification_type == IdlModificationType::Create + || e.modification_type == IdlModificationType::SetBuffer + }) + .collect(); + version_events.sort_by_key(|e| e.slot); + + if !version_events.is_empty() { + info!("IDL version boundaries (chronological):"); + for (i, event) in version_events.iter().enumerate() { + info!( + " [{}] slot {} - {}", + i + 1, + event.slot, + event.modification_type + ); + } + } +} + +/// Collects all IDL version entries for an Anchor program. +/// +/// Returns a list of slots where the IDL was modified, along with the transaction +/// signature that made the modification. +/// +/// Only collects SetBuffer and Create operations as "versions" since Write operations +/// are typically part of a multi-transaction sequence. +pub async fn collect_anchor_idl_versions( + helius: &Helius, + program_id: &Pubkey, + idl_address: &Pubkey, + min_slot: u64, +) -> eyre::Result> { + let events = collect_anchor_idl_events(helius, program_id, idl_address, min_slot).await?; + + // Filter to version boundaries (Create and SetBuffer) + let versions: Vec = events + .into_iter() + .filter(|e| { + e.modification_type == IdlModificationType::Create + || e.modification_type == IdlModificationType::SetBuffer + }) + .map(|e| IdlVersionEntry { slot: e.slot, signature: e.signature }) + .collect(); + + Ok(versions) +} diff --git a/src/core/idl/extraction.rs b/src/core/idl/extraction.rs new file mode 100644 index 0000000..e5ad284 --- /dev/null +++ b/src/core/idl/extraction.rs @@ -0,0 +1,174 @@ +//! Main IDL extraction orchestration. + +use log::{debug, info, warn}; +use solana_pubkey::Pubkey; + +use super::buffer::find_idl_buffer_in_transaction; +use super::decompression::{decompress_anchor_idl, decompress_raw_idl}; +use super::writes::{collect_idl_writes_from_transactions, reconstruct_idl_from_writes}; +use crate::core::helius::Helius; +use crate::core::types::{HeliusTx, IdlVersionEntry}; + +/// Tries to extract IDL content from a transaction. +/// +/// This handles different scenarios: +/// 1. Direct writes to IDL account +/// 2. Set buffer operation (need to fetch buffer transactions) +/// 3. Fetching current IDL state (for most recent version) +pub async fn extract_anchor_idl_from_slot( + helius: &Helius, + program_id: &Pubkey, + idl_address: &Pubkey, + version: &IdlVersionEntry, + all_idl_txs: &[HeliusTx], + is_most_recent: bool, + next_version_slot: Option, +) -> eyre::Result> { + // Find the transaction for this version + let tx = all_idl_txs.iter().find(|tx| tx.signature == Some(version.signature)); + + let Some(tx) = tx else { + warn!("Transaction not found for signature {}", version.signature); + return Ok(None); + }; + + // Check if this is a set_buffer operation + if let Some(buffer) = find_idl_buffer_in_transaction(tx, program_id, idl_address) { + info!( + "Slot {} is SetBuffer: fetching writes from buffer {}", + version.slot, buffer + ); + + // Fetch all transactions for the buffer + let buffer_txs = helius.keep_fetching_txs_for_account(&buffer).await?; + + // Collect writes to the buffer (sorted by slot, oldest first) + let writes = collect_idl_writes_from_transactions(&buffer_txs, program_id, &buffer); + + if !writes.is_empty() { + let first_slot = writes.first().map(|w| w.slot).unwrap_or(0); + let last_slot = writes.last().map(|w| w.slot).unwrap_or(0); + info!( + " Buffer reconstruction: {} Write txs spanning slots {}..{} ({} bytes total)", + writes.len(), + first_slot, + last_slot, + writes.iter().map(|w| w.data.len()).sum::() + ); + + // Reconstruct by concatenating all writes in chronological order + let compressed_data = reconstruct_idl_from_writes(&writes)?; + + // Verify it starts with zlib header (0x78 0x9c or similar) + if compressed_data.len() >= 2 && compressed_data[0] == 0x78 { + match decompress_raw_idl(&compressed_data) { + Ok(idl) => { + info!( + " Decompressed: {} bytes -> {} bytes JSON", + compressed_data.len(), + idl.len() + ); + return Ok(Some(idl)); + } + Err(e) => { + warn!("Decompression failed: {}", e); + } + } + } else { + debug!( + "Compressed data doesn't start with zlib header: {:02x?}", + &compressed_data[..compressed_data.len().min(4)] + ); + } + } + } + + // Try to find direct writes to the IDL account + // For Create operations, writes happen AFTER the create slot, so we filter: + // - slot >= version.slot (include this version's slot and after) + // - slot < next_version_slot (don't include writes that belong to next version) + let range_desc = match next_version_slot { + Some(next) => format!("slots {}..{}", version.slot, next), + None => format!("slots {}..HEAD", version.slot), + }; + info!( + "Slot {} is Create/direct: collecting writes in {}", + version.slot, range_desc + ); + + let txs_in_range: Vec<_> = all_idl_txs + .iter() + .filter(|t| { + t.slot.is_some_and(|s| { + s >= version.slot && next_version_slot.is_none_or(|next| s < next) + }) + }) + .collect(); + + let writes = collect_idl_writes_from_transactions( + &txs_in_range.iter().cloned().cloned().collect::>(), + program_id, + idl_address, + ); + + if !writes.is_empty() { + let first_slot = writes.first().map(|w| w.slot).unwrap_or(0); + let last_slot = writes.last().map(|w| w.slot).unwrap_or(0); + info!( + " Direct reconstruction: {} Write txs spanning slots {}..{} ({} bytes total)", + writes.len(), + first_slot, + last_slot, + writes.iter().map(|w| w.data.len()).sum::() + ); + + let compressed_data = reconstruct_idl_from_writes(&writes)?; + + // Verify it starts with zlib header + if compressed_data.len() >= 2 && compressed_data[0] == 0x78 { + match decompress_raw_idl(&compressed_data) { + Ok(idl) => { + info!( + " Decompressed: {} bytes -> {} bytes JSON", + compressed_data.len(), + idl.len() + ); + return Ok(Some(idl)); + } + Err(e) => { + warn!("Decompression of direct writes failed: {}", e); + } + } + } + } else { + info!(" No Write transactions found in range"); + } + + // If this is the most recent version, try fetching the current IDL account state + if is_most_recent { + info!(" Falling back to current account state (most recent version)"); + match helius.fetch_account(idl_address).await { + Ok(account) => { + info!(" Fetched IDL account: {} bytes", account.data.len()); + match decompress_anchor_idl(&account.data) { + Ok(idl) => { + info!( + " Decompressed from account: {} bytes JSON", + idl.len() + ); + return Ok(Some(idl)); + } + Err(e) => { + warn!("Failed to decompress IDL from account: {}", e); + } + } + } + Err(e) => { + debug!("Failed to fetch IDL account: {}", e); + } + } + } + + warn!("Could not extract IDL data for slot {}", version.slot); + Ok(None) +} diff --git a/src/core/idl/instruction.rs b/src/core/idl/instruction.rs new file mode 100644 index 0000000..25ee5d5 --- /dev/null +++ b/src/core/idl/instruction.rs @@ -0,0 +1,48 @@ +//! IDL instruction parsing and classification. + +use solana_pubkey::Pubkey; + +use super::constants::{ + IDL_IX_TAG_LE, IDL_VARIANT_CREATE, IDL_VARIANT_SET_BUFFER, IDL_VARIANT_WRITE, +}; +use super::types::IdlModificationType; +use crate::core::types::HeliusInstruction; + +/// Classifies an IDL instruction by its type. +pub(super) fn classify_idl_instruction( + instr: &HeliusInstruction, + program_id: &Pubkey, + idl_address: &Pubkey, +) -> Option { + if instr.program_id != *program_id { + return None; + } + + // Check if IDL account is in the instruction's accounts + let accounts = instr.accounts.as_ref()?; + if !accounts.contains(idl_address) { + return None; + } + + // Parse the instruction variant + let variant = parse_idl_instruction_variant(instr)?; + + Some(match variant { + IDL_VARIANT_CREATE => IdlModificationType::Create, + IDL_VARIANT_WRITE => IdlModificationType::Write, + IDL_VARIANT_SET_BUFFER => IdlModificationType::SetBuffer, + _ => IdlModificationType::Other, + }) +} + +/// Parses an IDL instruction and returns the variant if valid. +pub(super) fn parse_idl_instruction_variant(instr: &HeliusInstruction) -> Option { + let raw = bs58::decode(&instr.data).into_vec().ok()?; + if raw.len() < 9 { + return None; + } + if raw[..8] != IDL_IX_TAG_LE { + return None; + } + Some(raw[8]) +} diff --git a/src/core/idl/mod.rs b/src/core/idl/mod.rs new file mode 100644 index 0000000..d0079cd --- /dev/null +++ b/src/core/idl/mod.rs @@ -0,0 +1,19 @@ +//! Anchor IDL extraction and history collection. +//! +//! This module handles extracting historical IDL versions from Anchor programs. +//! Anchor stores IDLs in a PDA derived from the program ID. + +mod address; +mod buffer; +mod constants; +mod decompression; +mod events; +mod extraction; +mod instruction; +mod types; +mod writes; + +// Public re-exports +pub use address::derive_anchor_idl_address; +pub use events::collect_anchor_idl_versions; +pub use extraction::extract_anchor_idl_from_slot; diff --git a/src/core/idl/types.rs b/src/core/idl/types.rs new file mode 100644 index 0000000..04ce4e5 --- /dev/null +++ b/src/core/idl/types.rs @@ -0,0 +1,42 @@ +//! Type definitions for IDL operations. + +use solana_rpc_client::api::response::transaction::Signature; + +/// Identifies the type of IDL modification in a transaction. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IdlModificationType { + /// SetBuffer: atomically replaces IDL content from a buffer + SetBuffer, + /// Write: appends data (may be part of a sequence) + Write, + /// Create: creates the IDL account + Create, + /// Other IDL instruction (SetAuthority, Close, Resize) + Other, +} + +impl std::fmt::Display for IdlModificationType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + IdlModificationType::SetBuffer => write!(f, "SetBuffer"), + IdlModificationType::Write => write!(f, "Write"), + IdlModificationType::Create => write!(f, "Create"), + IdlModificationType::Other => write!(f, "Other"), + } + } +} + +/// An IDL event detected on the IDL account. +#[derive(Debug, Clone)] +pub struct IdlEvent { + pub slot: u64, + pub signature: Signature, + pub modification_type: IdlModificationType, +} + +/// Represents a Write instruction with its slot for ordering. +#[derive(Debug, Clone)] +pub(crate) struct IdlWriteEntry { + pub slot: u64, + pub data: Vec, +} diff --git a/src/core/idl/writes.rs b/src/core/idl/writes.rs new file mode 100644 index 0000000..6b2c3e0 --- /dev/null +++ b/src/core/idl/writes.rs @@ -0,0 +1,119 @@ +//! Write instruction handling and IDL reconstruction. + +use eyre::eyre; +use solana_pubkey::Pubkey; + +use super::constants::{IDL_IX_TAG_LE, IDL_VARIANT_WRITE}; +use super::types::IdlWriteEntry; +use crate::core::types::{HeliusInstruction, HeliusTx}; + +/// Collects IDL Write data from transactions for a buffer/IDL account. +/// +/// Anchor's __idl_write is an APPEND operation - each Write adds data to the end. +/// We need to collect writes in chronological order (oldest first) and concatenate. +/// +/// Instruction format: +/// - 8 bytes: IDL_IX_TAG (0x40f4bc78a7e9690a in LE) +/// - 1 byte: variant (2 for Write) +/// - 4 bytes: Vec length (u32 LE) +/// - N bytes: data +pub(super) fn collect_idl_writes_from_transactions( + txs: &[HeliusTx], + program_id: &Pubkey, + target_account: &Pubkey, +) -> Vec { + let mut writes = Vec::new(); + + for tx in txs { + let Some(slot) = tx.slot else { + continue; + }; + let Some(instrs) = &tx.instructions else { + continue; + }; + + for instr in instrs { + if let Some(data) = extract_idl_write_data(instr, program_id, target_account) { + writes.push(IdlWriteEntry { slot, data }); + } + // Also check inner instructions + if let Some(inner) = &instr.inner_instructions { + for inner_instr in inner { + if let Some(data) = + extract_idl_write_data(inner_instr, program_id, target_account) + { + writes.push(IdlWriteEntry { slot, data }); + } + } + } + } + } + + // Sort by slot ascending (oldest first) - writes are appended in order + writes.sort_by_key(|w| w.slot); + writes +} + +/// Extracts data from an IDL Write instruction. +/// +/// Returns the data bytes if this is a valid Write instruction targeting the account. +fn extract_idl_write_data( + instr: &HeliusInstruction, + program_id: &Pubkey, + target_account: &Pubkey, +) -> Option> { + if instr.program_id != *program_id { + return None; + } + + // Write instruction has 2 accounts: [target, authority] + let accounts = instr.accounts.as_ref()?; + if accounts.first() != Some(target_account) { + return None; + } + + let raw = bs58::decode(&instr.data).into_vec().ok()?; + + // Minimum size: 8 (IDL_IX_TAG) + 1 (variant) + 4 (vec len) + 1 (at least one byte) + if raw.len() < 14 { + return None; + } + + // Check IDL_IX_TAG + if raw[..8] != IDL_IX_TAG_LE { + return None; + } + + // Check variant is Write (2) + if raw[8] != IDL_VARIANT_WRITE { + return None; + } + + // Parse Vec length + let data_len_bytes: [u8; 4] = raw[9..13].try_into().ok()?; + let data_len = u32::from_le_bytes(data_len_bytes) as usize; + + // Verify we have enough data + if raw.len() < 13 + data_len { + return None; + } + + Some(raw[13..13 + data_len].to_vec()) +} + +/// Reconstructs compressed IDL data from Write entries. +/// +/// Writes are APPENDED in chronological order, so we simply concatenate. +pub(super) fn reconstruct_idl_from_writes(writes: &[IdlWriteEntry]) -> eyre::Result> { + if writes.is_empty() { + return Err(eyre!("No write chunks to reconstruct")); + } + + // Concatenate all write data in order (already sorted by slot) + let mut buffer = Vec::new(); + for entry in writes { + buffer.extend_from_slice(&entry.data); + } + + Ok(buffer) +} diff --git a/src/core/mod.rs b/src/core/mod.rs index 08baf29..1d7f595 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,4 +1,5 @@ pub mod helius; +pub mod idl; pub mod reconstruct; pub mod types; pub mod upgrades; diff --git a/src/core/types.rs b/src/core/types.rs index 7fb48c2..23eb525 100644 --- a/src/core/types.rs +++ b/src/core/types.rs @@ -2,13 +2,19 @@ use serde::Deserialize; use serde_with::{DisplayFromStr, serde_as}; use solana_pubkey::{Pubkey, pubkey}; use solana_rpc_client::api::response::transaction::Signature; -use wincode::SchemaRead; -use wincode::containers::Pod; +use wincode::{SchemaRead, containers::Pod}; pub const BPF_UPGRADEABLE_LOADER: Pubkey = pubkey!("BPFLoaderUpgradeab1e11111111111111111111111"); +/// Represents a version of an IDL at a particular slot +#[derive(Debug, Clone)] +pub struct IdlVersionEntry { + pub slot: u64, + pub signature: Signature, +} + #[serde_as] -#[derive(Deserialize)] +#[derive(Clone, Deserialize)] pub struct HeliusTx { #[serde_as(as = "Option")] pub signature: Option, diff --git a/src/core/upgrades.rs b/src/core/upgrades.rs index bbf8bb2..560026a 100644 --- a/src/core/upgrades.rs +++ b/src/core/upgrades.rs @@ -1,7 +1,9 @@ -use crate::core::helius::Helius; -use crate::core::types::{ - BPF_UPGRADEABLE_LOADER, ExtendEntry, HeliusInstruction, ProgramChangeEntries, UpgradeEntry, - UpgradeableLoaderInstruction, +use crate::core::{ + helius::Helius, + types::{ + BPF_UPGRADEABLE_LOADER, ExtendEntry, HeliusInstruction, ProgramChangeEntries, UpgradeEntry, + UpgradeableLoaderInstruction, + }, }; use eyre::eyre; use solana_pubkey::Pubkey; diff --git a/src/main.rs b/src/main.rs index 011fabc..352bcce 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use clap::{Parser, Subcommand}; -use commands::reconstruct_older_versions; +use commands::{fetch_idl_history, reconstruct_older_versions}; use env_logger::Env; use solana_pubkey::Pubkey; @@ -39,6 +39,25 @@ enum Command { #[arg(long, default_value_t = 10)] max_rps: u32, }, + + /// Fetch and store historical IDL versions for slots >= start. + IdlHistory { + /// Program to fetch IDL history for + #[clap(short, long)] + program: Pubkey, + + /// Minimum slot (inclusive) to include. + #[clap(short, long)] + slot: u64, + + /// Base output directory for historical IDLs. + #[arg(long, default_value = "historical-idl")] + output_dir: PathBuf, + + /// Global max requests per second (shared across all tasks) + #[arg(long, default_value_t = 10)] + max_rps: u32, + }, } #[tokio::main] @@ -49,6 +68,9 @@ async fn main() -> eyre::Result<()> { Command::Resurrect { program, slot, output_dir, concurrency, max_rps } => { reconstruct_older_versions(&program, slot, &output_dir, concurrency, max_rps).await? } + Command::IdlHistory { program, slot, output_dir, max_rps } => { + fetch_idl_history(&program, slot, &output_dir, max_rps).await? + } } Ok(()) From fbe2f1f4698ce5aec9ca613b22aa0ccdd23794aa Mon Sep 17 00:00:00 2001 From: Sergei Patrikeev Date: Mon, 19 Jan 2026 20:13:05 +0000 Subject: [PATCH 2/2] refactor: extract TransactionProvider trait and generalize transaction types - Add TransactionProvider trait for abstracting transaction fetching - Rename HeliusTx to ParsedTransaction and HeliusInstruction to ParsedInstruction - Make transaction_index optional (REST API doesn't provide it) - Add HeliusTransaction/HeliusInstruction for Helius-specific response parsing - Update slot-based tracking to position-based (SlotPosition) throughout - Extract SlotPosition to its own module --- src/commands/history.rs | 21 +++--- src/commands/idl_history.rs | 39 +++++------- src/core/helius.rs | 123 +++++++++++++++++++----------------- src/core/idl/buffer.rs | 6 +- src/core/idl/events.rs | 99 +++++++++++++---------------- src/core/idl/extraction.rs | 56 ++++++++-------- src/core/idl/instruction.rs | 6 +- src/core/idl/types.rs | 8 ++- src/core/idl/writes.rs | 18 +++--- src/core/mod.rs | 3 + src/core/reconstruct.rs | 4 +- src/core/slot_position.rs | 25 ++++++++ src/core/types.rs | 100 +++++++++++++++++++++++++++-- src/core/upgrades.rs | 62 ++++++++---------- 14 files changed, 330 insertions(+), 240 deletions(-) create mode 100644 src/core/slot_position.rs diff --git a/src/commands/history.rs b/src/commands/history.rs index d845179..c0634b4 100644 --- a/src/commands/history.rs +++ b/src/commands/history.rs @@ -45,9 +45,9 @@ pub async fn reconstruct_older_versions( return Ok(()); } - let mut extensions_by_slot: Vec<_> = changes.extensions.clone(); - extensions_by_slot.sort_by(|a, b| b.slot.cmp(&a.slot)); - let extensions_by_slot = Arc::new(extensions_by_slot); + let mut extensions_by_position: Vec<_> = changes.extensions.clone(); + extensions_by_position.sort_by(|a, b| b.position.cmp(&a.position)); + let extensions_by_position = Arc::new(extensions_by_position); let semaphore = Arc::new(Semaphore::new(concurrency.max(1))); let mut join_set: JoinSet> = JoinSet::new(); @@ -55,27 +55,28 @@ pub async fn reconstruct_older_versions( for entry in changes.upgrades { let helius = helius.clone(); let program_dir = program_dir.clone(); - let extensions_by_slot = extensions_by_slot.clone(); + let extensions_by_position = extensions_by_position.clone(); let sem = semaphore.clone(); join_set.spawn(async move { let _permit = sem.acquire_owned().await?; - let out_path = program_dir.join(format!("{}.so", entry.slot)); + // Use slot for filename (position format "slot:tx_index" would be awkward) + let out_path = program_dir.join(format!("{}.so", entry.position.slot())); if tokio::fs::metadata(&out_path).await.is_ok() { - info!("Skipping slot {} (already stored)", entry.slot); + info!("Skipping position {} (already stored)", entry.position); return Ok(()); } - info!("Reconstructing slot {} from buffer {}", entry.slot, entry.buffer); + info!("Reconstructing position {} from buffer {}", entry.position, entry.buffer); // prgm len >= max write buffer offset+data so we have to track total len for checksums // to match - let extensions_after_this_slot: usize = extensions_by_slot + let extensions_after_this: usize = extensions_by_position .iter() - .filter(|ext| ext.slot > entry.slot) + .filter(|ext| ext.position > entry.position) .map(|ext| ext.additional_bytes as usize) .sum(); - let total_len = current_program_len - extensions_after_this_slot; + let total_len = current_program_len - extensions_after_this; let buffer_txs = helius.keep_fetching_txs_for_account(&entry.buffer).await?; let reconstructed = reconstruct_from_transactions(&entry.buffer, &buffer_txs, total_len)?; diff --git a/src/commands/idl_history.rs b/src/commands/idl_history.rs index 8060519..8e402bd 100644 --- a/src/commands/idl_history.rs +++ b/src/commands/idl_history.rs @@ -39,15 +39,7 @@ async fn process_anchor_idl( ) -> eyre::Result<()> { let idl_address = derive_anchor_idl_address(program); info!("Checking Anchor IDL at {}", idl_address); - - // Check if the IDL account has any transactions - let test_txs = helius.fetch_txs_for_account(&idl_address, &None).await?; - if test_txs.is_empty() { - info!("No Anchor IDL found for program {}", program); - return Ok(()); - } - - info!("Found Anchor IDL account, collecting versions..."); + info!("Collecting Anchor IDL versions..."); // Collect all versions let mut versions = @@ -58,8 +50,8 @@ async fn process_anchor_idl( return Ok(()); } - // Sort versions by slot ascending to determine next_version_slot for each - versions.sort_by_key(|v| v.slot); + // Sort versions by position ascending to determine next_version_slot for each + versions.sort_by_key(|v| v.position); info!("Found {} Anchor IDL version(s)", versions.len()); @@ -70,25 +62,26 @@ async fn process_anchor_idl( let anchor_dir = program_dir.join("anchor"); tokio::fs::create_dir_all(&anchor_dir).await?; - // Find the most recent slot - let most_recent_slot = versions.iter().map(|v| v.slot).max().unwrap_or(0); + // Find the most recent position + let most_recent_position = versions.iter().map(|v| v.position).max(); // Process versions sequentially for clean logs for (i, version) in versions.iter().enumerate() { - let next_version_slot = versions.get(i + 1).map(|v| v.slot); - let is_most_recent = version.slot == most_recent_slot; + let next_version_position = versions.get(i + 1).map(|v| v.position); + let is_most_recent = Some(version.position) == most_recent_position; - let out_path = anchor_dir.join(format!("{}.json", version.slot)); + // Use slot for filename (position format "slot:tx_index" would be awkward) + let out_path = anchor_dir.join(format!("{}.json", version.position.slot())); if tokio::fs::metadata(&out_path).await.is_ok() { - info!("Skipping slot {} (already stored)", version.slot); + info!("Skipping position {} (already stored)", version.position); continue; } info!( - "Extracting IDL [{}/{}] slot {}", + "Extracting IDL [{}/{}] position {}", i + 1, versions.len(), - version.slot + version.position ); match extract_anchor_idl_from_slot( @@ -98,7 +91,7 @@ async fn process_anchor_idl( version, &all_idl_txs, is_most_recent, - next_version_slot, + next_version_position, ) .await { @@ -111,15 +104,15 @@ async fn process_anchor_idl( info!("Wrote {}", out_path.display()); } Err(e) => { - warn!("Invalid JSON for slot {}: {}", version.slot, e); + warn!("Invalid JSON for position {}: {}", version.position, e); } } } Ok(None) => { - warn!("Could not extract IDL for slot {}", version.slot); + warn!("Could not extract IDL for position {}", version.position); } Err(e) => { - warn!("Error extracting IDL for slot {}: {}", version.slot, e); + warn!("Error extracting IDL for position {}: {}", version.position, e); } } } diff --git a/src/core/helius.rs b/src/core/helius.rs index cbff98a..eb865d9 100644 --- a/src/core/helius.rs +++ b/src/core/helius.rs @@ -1,4 +1,6 @@ -use crate::core::types::HeliusTx; +//! Helius API client for fetching transactions and accounts. + +use crate::core::types::{HeliusTransaction, ParsedTransaction, TransactionProvider}; use eyre::eyre; use itertools::Itertools; use log::debug; @@ -7,9 +9,7 @@ use serde::Serialize; use solana_account::Account; use solana_commitment_config::CommitmentConfig; use solana_pubkey::Pubkey; -use solana_rpc_client::{ - api::response::transaction::Signature, nonblocking::rpc_client::RpcClient, -}; +use solana_rpc_client::nonblocking::rpc_client::RpcClient; use std::{env, num::NonZeroU32, sync::Arc}; use governor::{ @@ -59,26 +59,24 @@ impl Helius { Ok(self.rpc.get_account(pubkey).await?) } + /// Fetches all transactions for an account, handling pagination internally. pub async fn keep_fetching_txs_for_account( &self, address: &Pubkey, - ) -> eyre::Result> { - let mut all: Vec = Vec::new(); - let mut before = None; + ) -> eyre::Result> { + let mut all: Vec = Vec::new(); + let mut before_signature: Option = None; let mut page = 0; loop { - let mut batch = fetch_helius_transactions_page( - &self.http, - &self.key, - self.limiter.as_deref(), - address, - &before, - ) - .await?; + let batch = self + .fetch_transactions_page(address, before_signature.as_deref()) + .await?; + if batch.is_empty() { break; } + page += 1; debug!( "Fetched page {} for {} ({} txs, total {})", @@ -87,32 +85,68 @@ impl Helius { batch.len(), all.len() + batch.len() ); - before = batch.last().and_then(|item| item.signature); - all.append(&mut batch); + + // Get the last signature for pagination + before_signature = batch.last().and_then(|tx| tx.signature.map(|s| s.to_string())); + + all.extend(batch); } - debug!("Finished fetching {} for {} ({} total txs)", page, address, all.len()); + debug!("Finished fetching {} pages for {} ({} total txs)", page, address, all.len()); Ok(all.into_iter().unique_by(|tx| tx.signature).collect()) } - pub async fn fetch_txs_for_account( + /// Fetches a single page of transactions using the REST API. + async fn fetch_transactions_page( &self, address: &Pubkey, - before: &Option, - ) -> eyre::Result> { - fetch_helius_transactions_page( - &self.http, - &self.key, - self.limiter.as_deref(), - address, - before, - ) - .await + before_signature: Option<&str>, + ) -> eyre::Result> { + self.throttle().await; + + let url = format!("{HELIUS_API_BASE}/addresses/{address}/transactions"); + let params = RestParams { + sort_order: "desc".to_string(), + api_key: self.key.to_string(), + limit: 100, + before_signature: before_signature.map(|s| s.to_string()), + }; + + let resp = self.http.get(&url).query(¶ms).send().await?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + return Err(eyre!("Helius API error {}: {}", status, body)); + } + + let body = resp.text().await?; + debug!("Helius REST response length: {} bytes", body.len()); + + let helius_txs: Vec = serde_json::from_str(&body).map_err(|e| { + debug!("Failed to parse response: {}", &body[..body.len().min(500)]); + eyre!("Failed to parse Helius response: {}", e) + })?; + + // Convert to generic ParsedTransaction format + let txs = helius_txs + .into_iter() + .filter_map(|tx| tx.into_parsed()) + .collect(); + + Ok(txs) + } +} + +/// Implement the TransactionProvider trait for Helius +impl TransactionProvider for Helius { + async fn fetch_transactions(&self, address: &Pubkey) -> eyre::Result> { + self.keep_fetching_txs_for_account(address).await } } #[derive(Debug, Serialize)] -struct Params { +struct RestParams { sort_order: String, #[serde(rename = "api-key")] api_key: String, @@ -120,32 +154,3 @@ struct Params { #[serde(rename = "before-signature", skip_serializing_if = "Option::is_none")] before_signature: Option, } - -async fn fetch_helius_transactions_page( - http: &Client, - api_key: &str, - limiter: Option<&DirectLimiter>, - address: &Pubkey, - before_signature: &Option, -) -> eyre::Result> { - if let Some(l) = limiter { - l.until_ready().await; - } - - let url = format!("{HELIUS_API_BASE}/addresses/{address}/transactions"); - let params = Params { - sort_order: "desc".to_string(), - api_key: api_key.to_string(), - limit: 100, - before_signature: before_signature.as_ref().map(|s| s.to_string()), - }; - - let resp = http.get(&url).query(¶ms).send().await?; - - let status = resp.status(); - if !status.is_success() { - return Err(eyre!("Helius API error {}", status)); - } - - Ok(resp.json::>().await?) -} diff --git a/src/core/idl/buffer.rs b/src/core/idl/buffer.rs index 38ae3d9..05c73fb 100644 --- a/src/core/idl/buffer.rs +++ b/src/core/idl/buffer.rs @@ -4,13 +4,13 @@ use solana_pubkey::Pubkey; use super::constants::IDL_VARIANT_SET_BUFFER; use super::instruction::parse_idl_instruction_variant; -use crate::core::types::{HeliusInstruction, HeliusTx}; +use crate::core::types::{ParsedInstruction, ParsedTransaction}; /// Extracts the buffer account from an `__idl_set_buffer` instruction. /// /// For set_buffer, the account layout is: [buffer, idl_account, authority] fn extract_buffer_from_set_buffer( - instr: &HeliusInstruction, + instr: &ParsedInstruction, program_id: &Pubkey, idl_address: &Pubkey, ) -> Option { @@ -37,7 +37,7 @@ fn extract_buffer_from_set_buffer( /// Attempts to find the buffer used in an IDL-modifying transaction. pub(super) fn find_idl_buffer_in_transaction( - tx: &HeliusTx, + tx: &ParsedTransaction, program_id: &Pubkey, idl_address: &Pubkey, ) -> Option { diff --git a/src/core/idl/events.rs b/src/core/idl/events.rs index 7456385..21aceb0 100644 --- a/src/core/idl/events.rs +++ b/src/core/idl/events.rs @@ -2,7 +2,6 @@ use log::info; use solana_pubkey::Pubkey; -use solana_rpc_client::api::response::transaction::Signature; use super::instruction::classify_idl_instruction; use super::types::{IdlEvent, IdlModificationType}; @@ -20,67 +19,57 @@ pub(super) async fn collect_anchor_idl_events( min_slot: u64, ) -> eyre::Result> { let mut events = Vec::new(); - let mut before: Option = None; - loop { - let batch = helius.fetch_txs_for_account(idl_address, &before).await?; - if batch.is_empty() { - break; + // Fetch all transactions for the IDL account + let all_txs = helius.keep_fetching_txs_for_account(idl_address).await?; + + for tx in &all_txs { + let position = tx.position(); + if position.slot() < min_slot { + continue; } - for tx in &batch { - let Some(slot) = tx.slot else { - continue; - }; - if slot < min_slot { - // Log summary before returning - log_events_summary(&events); - return Ok(events); - } - let Some(instrs) = &tx.instructions else { - continue; - }; - - // Determine the type of IDL modification - let mut mod_type: Option = None; - - for instr in instrs { - if let Some(t) = classify_idl_instruction(instr, program_id, idl_address) { - // SetBuffer takes precedence - if t == IdlModificationType::SetBuffer { - mod_type = Some(t); - break; - } - if mod_type.is_none() || t == IdlModificationType::Create { - mod_type = Some(t); - } + let Some(instrs) = &tx.instructions else { + continue; + }; + + // Determine the type of IDL modification + let mut mod_type: Option = None; + + for instr in instrs { + if let Some(t) = classify_idl_instruction(instr, program_id, idl_address) { + // SetBuffer takes precedence + if t == IdlModificationType::SetBuffer { + mod_type = Some(t); + break; } - // Check inner instructions - if let Some(inner) = &instr.inner_instructions { - for inner_instr in inner { - if let Some(t) = - classify_idl_instruction(inner_instr, program_id, idl_address) - { - if t == IdlModificationType::SetBuffer { - mod_type = Some(t); - break; - } - if mod_type.is_none() || t == IdlModificationType::Create { - mod_type = Some(t); - } + if mod_type.is_none() || t == IdlModificationType::Create { + mod_type = Some(t); + } + } + // Check inner instructions + if let Some(inner) = &instr.inner_instructions { + for inner_instr in inner { + if let Some(t) = + classify_idl_instruction(inner_instr, program_id, idl_address) + { + if t == IdlModificationType::SetBuffer { + mod_type = Some(t); + break; + } + if mod_type.is_none() || t == IdlModificationType::Create { + mod_type = Some(t); } } } } + } - if let Some(t) = mod_type { - if let Some(sig) = tx.signature { - events.push(IdlEvent { slot, signature: sig, modification_type: t }); - } + if let Some(t) = mod_type { + if let Some(sig) = tx.signature { + events.push(IdlEvent { position, signature: sig, modification_type: t }); } } - - before = batch.last().and_then(|tx| tx.signature); } log_events_summary(&events); @@ -111,15 +100,15 @@ fn log_events_summary(events: &[IdlEvent]) { || e.modification_type == IdlModificationType::SetBuffer }) .collect(); - version_events.sort_by_key(|e| e.slot); + version_events.sort_by_key(|e| e.position); if !version_events.is_empty() { info!("IDL version boundaries (chronological):"); for (i, event) in version_events.iter().enumerate() { info!( - " [{}] slot {} - {}", + " [{}] {} - {}", i + 1, - event.slot, + event.position, event.modification_type ); } @@ -148,7 +137,7 @@ pub async fn collect_anchor_idl_versions( e.modification_type == IdlModificationType::Create || e.modification_type == IdlModificationType::SetBuffer }) - .map(|e| IdlVersionEntry { slot: e.slot, signature: e.signature }) + .map(|e| IdlVersionEntry { position: e.position, signature: e.signature }) .collect(); Ok(versions) diff --git a/src/core/idl/extraction.rs b/src/core/idl/extraction.rs index e5ad284..4213af0 100644 --- a/src/core/idl/extraction.rs +++ b/src/core/idl/extraction.rs @@ -7,7 +7,8 @@ use super::buffer::find_idl_buffer_in_transaction; use super::decompression::{decompress_anchor_idl, decompress_raw_idl}; use super::writes::{collect_idl_writes_from_transactions, reconstruct_idl_from_writes}; use crate::core::helius::Helius; -use crate::core::types::{HeliusTx, IdlVersionEntry}; +use crate::core::types::{IdlVersionEntry, ParsedTransaction}; +use crate::core::SlotPosition; /// Tries to extract IDL content from a transaction. /// @@ -20,9 +21,9 @@ pub async fn extract_anchor_idl_from_slot( program_id: &Pubkey, idl_address: &Pubkey, version: &IdlVersionEntry, - all_idl_txs: &[HeliusTx], + all_idl_txs: &[ParsedTransaction], is_most_recent: bool, - next_version_slot: Option, + next_version_position: Option, ) -> eyre::Result> { // Find the transaction for this version let tx = all_idl_txs.iter().find(|tx| tx.signature == Some(version.signature)); @@ -35,24 +36,24 @@ pub async fn extract_anchor_idl_from_slot( // Check if this is a set_buffer operation if let Some(buffer) = find_idl_buffer_in_transaction(tx, program_id, idl_address) { info!( - "Slot {} is SetBuffer: fetching writes from buffer {}", - version.slot, buffer + "Position {} is SetBuffer: fetching writes from buffer {}", + version.position, buffer ); // Fetch all transactions for the buffer let buffer_txs = helius.keep_fetching_txs_for_account(&buffer).await?; - // Collect writes to the buffer (sorted by slot, oldest first) + // Collect writes to the buffer (sorted by position, oldest first) let writes = collect_idl_writes_from_transactions(&buffer_txs, program_id, &buffer); if !writes.is_empty() { - let first_slot = writes.first().map(|w| w.slot).unwrap_or(0); - let last_slot = writes.last().map(|w| w.slot).unwrap_or(0); + let first_pos = writes.first().map(|w| w.position); + let last_pos = writes.last().map(|w| w.position); info!( - " Buffer reconstruction: {} Write txs spanning slots {}..{} ({} bytes total)", + " Buffer reconstruction: {} Write txs spanning {:?}..{:?} ({} bytes total)", writes.len(), - first_slot, - last_slot, + first_pos, + last_pos, writes.iter().map(|w| w.data.len()).sum::() ); @@ -84,24 +85,23 @@ pub async fn extract_anchor_idl_from_slot( } // Try to find direct writes to the IDL account - // For Create operations, writes happen AFTER the create slot, so we filter: - // - slot >= version.slot (include this version's slot and after) - // - slot < next_version_slot (don't include writes that belong to next version) - let range_desc = match next_version_slot { - Some(next) => format!("slots {}..{}", version.slot, next), - None => format!("slots {}..HEAD", version.slot), + // For Create operations, writes happen AFTER the create position, so we filter: + // - position >= version.position (include this version's position and after) + // - position < next_version_position (don't include writes that belong to next version) + let range_desc = match next_version_position { + Some(next) => format!("{}..{}", version.position, next), + None => format!("{}..HEAD", version.position), }; info!( - "Slot {} is Create/direct: collecting writes in {}", - version.slot, range_desc + "Position {} is Create/direct: collecting writes in {}", + version.position, range_desc ); let txs_in_range: Vec<_> = all_idl_txs .iter() .filter(|t| { - t.slot.is_some_and(|s| { - s >= version.slot && next_version_slot.is_none_or(|next| s < next) - }) + let pos = t.position(); + pos >= version.position && next_version_position.is_none_or(|next| pos < next) }) .collect(); @@ -112,13 +112,13 @@ pub async fn extract_anchor_idl_from_slot( ); if !writes.is_empty() { - let first_slot = writes.first().map(|w| w.slot).unwrap_or(0); - let last_slot = writes.last().map(|w| w.slot).unwrap_or(0); + let first_pos = writes.first().map(|w| w.position); + let last_pos = writes.last().map(|w| w.position); info!( - " Direct reconstruction: {} Write txs spanning slots {}..{} ({} bytes total)", + " Direct reconstruction: {} Write txs spanning {:?}..{:?} ({} bytes total)", writes.len(), - first_slot, - last_slot, + first_pos, + last_pos, writes.iter().map(|w| w.data.len()).sum::() ); @@ -169,6 +169,6 @@ pub async fn extract_anchor_idl_from_slot( } } - warn!("Could not extract IDL data for slot {}", version.slot); + warn!("Could not extract IDL data for position {}", version.position); Ok(None) } diff --git a/src/core/idl/instruction.rs b/src/core/idl/instruction.rs index 25ee5d5..bb2d3f7 100644 --- a/src/core/idl/instruction.rs +++ b/src/core/idl/instruction.rs @@ -6,11 +6,11 @@ use super::constants::{ IDL_IX_TAG_LE, IDL_VARIANT_CREATE, IDL_VARIANT_SET_BUFFER, IDL_VARIANT_WRITE, }; use super::types::IdlModificationType; -use crate::core::types::HeliusInstruction; +use crate::core::types::ParsedInstruction; /// Classifies an IDL instruction by its type. pub(super) fn classify_idl_instruction( - instr: &HeliusInstruction, + instr: &ParsedInstruction, program_id: &Pubkey, idl_address: &Pubkey, ) -> Option { @@ -36,7 +36,7 @@ pub(super) fn classify_idl_instruction( } /// Parses an IDL instruction and returns the variant if valid. -pub(super) fn parse_idl_instruction_variant(instr: &HeliusInstruction) -> Option { +pub(super) fn parse_idl_instruction_variant(instr: &ParsedInstruction) -> Option { let raw = bs58::decode(&instr.data).into_vec().ok()?; if raw.len() < 9 { return None; diff --git a/src/core/idl/types.rs b/src/core/idl/types.rs index 04ce4e5..4e5a0fb 100644 --- a/src/core/idl/types.rs +++ b/src/core/idl/types.rs @@ -2,6 +2,8 @@ use solana_rpc_client::api::response::transaction::Signature; +use crate::core::SlotPosition; + /// Identifies the type of IDL modification in a transaction. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum IdlModificationType { @@ -29,14 +31,14 @@ impl std::fmt::Display for IdlModificationType { /// An IDL event detected on the IDL account. #[derive(Debug, Clone)] pub struct IdlEvent { - pub slot: u64, + pub position: SlotPosition, pub signature: Signature, pub modification_type: IdlModificationType, } -/// Represents a Write instruction with its slot for ordering. +/// Represents a Write instruction with its position for ordering. #[derive(Debug, Clone)] pub(crate) struct IdlWriteEntry { - pub slot: u64, + pub position: SlotPosition, pub data: Vec, } diff --git a/src/core/idl/writes.rs b/src/core/idl/writes.rs index 6b2c3e0..d510e00 100644 --- a/src/core/idl/writes.rs +++ b/src/core/idl/writes.rs @@ -5,7 +5,7 @@ use solana_pubkey::Pubkey; use super::constants::{IDL_IX_TAG_LE, IDL_VARIANT_WRITE}; use super::types::IdlWriteEntry; -use crate::core::types::{HeliusInstruction, HeliusTx}; +use crate::core::types::{ParsedInstruction, ParsedTransaction}; /// Collects IDL Write data from transactions for a buffer/IDL account. /// @@ -18,23 +18,21 @@ use crate::core::types::{HeliusInstruction, HeliusTx}; /// - 4 bytes: Vec length (u32 LE) /// - N bytes: data pub(super) fn collect_idl_writes_from_transactions( - txs: &[HeliusTx], + txs: &[ParsedTransaction], program_id: &Pubkey, target_account: &Pubkey, ) -> Vec { let mut writes = Vec::new(); for tx in txs { - let Some(slot) = tx.slot else { - continue; - }; + let position = tx.position(); let Some(instrs) = &tx.instructions else { continue; }; for instr in instrs { if let Some(data) = extract_idl_write_data(instr, program_id, target_account) { - writes.push(IdlWriteEntry { slot, data }); + writes.push(IdlWriteEntry { position, data }); } // Also check inner instructions if let Some(inner) = &instr.inner_instructions { @@ -42,15 +40,15 @@ pub(super) fn collect_idl_writes_from_transactions( if let Some(data) = extract_idl_write_data(inner_instr, program_id, target_account) { - writes.push(IdlWriteEntry { slot, data }); + writes.push(IdlWriteEntry { position, data }); } } } } } - // Sort by slot ascending (oldest first) - writes are appended in order - writes.sort_by_key(|w| w.slot); + // Sort by position ascending (oldest first) - writes are appended in order + writes.sort_by_key(|w| w.position); writes } @@ -58,7 +56,7 @@ pub(super) fn collect_idl_writes_from_transactions( /// /// Returns the data bytes if this is a valid Write instruction targeting the account. fn extract_idl_write_data( - instr: &HeliusInstruction, + instr: &ParsedInstruction, program_id: &Pubkey, target_account: &Pubkey, ) -> Option> { diff --git a/src/core/mod.rs b/src/core/mod.rs index 1d7f595..27b6d6a 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,5 +1,8 @@ pub mod helius; pub mod idl; pub mod reconstruct; +pub mod slot_position; pub mod types; pub mod upgrades; + +pub use slot_position::SlotPosition; diff --git a/src/core/reconstruct.rs b/src/core/reconstruct.rs index b6c93c6..59f1202 100644 --- a/src/core/reconstruct.rs +++ b/src/core/reconstruct.rs @@ -1,12 +1,12 @@ use crate::core::types::{ - BPF_UPGRADEABLE_LOADER, HeliusTx, UpgradeableLoaderInstruction, WriteChunk, + BPF_UPGRADEABLE_LOADER, ParsedTransaction, UpgradeableLoaderInstruction, WriteChunk, }; use eyre::eyre; use solana_pubkey::Pubkey; pub fn reconstruct_from_transactions( target: &Pubkey, - items: &[HeliusTx], + items: &[ParsedTransaction], total_len: usize, ) -> eyre::Result> { let mut chunks = Vec::new(); diff --git a/src/core/slot_position.rs b/src/core/slot_position.rs new file mode 100644 index 0000000..979c230 --- /dev/null +++ b/src/core/slot_position.rs @@ -0,0 +1,25 @@ +/// Represents a chronological position in the Solana ledger. +/// +/// Provides definitive ordering of transactions: first by slot, then by +/// transaction index within the slot. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct SlotPosition { + pub slot: u64, + pub tx_index: u32, +} + +impl SlotPosition { + pub fn new(slot: u64, tx_index: u32) -> Self { + Self { slot, tx_index } + } + + pub fn slot(&self) -> u64 { + self.slot + } +} + +impl std::fmt::Display for SlotPosition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.slot, self.tx_index) + } +} diff --git a/src/core/types.rs b/src/core/types.rs index 23eb525..6722563 100644 --- a/src/core/types.rs +++ b/src/core/types.rs @@ -4,26 +4,94 @@ use solana_pubkey::{Pubkey, pubkey}; use solana_rpc_client::api::response::transaction::Signature; use wincode::{SchemaRead, containers::Pod}; +use super::SlotPosition; + pub const BPF_UPGRADEABLE_LOADER: Pubkey = pubkey!("BPFLoaderUpgradeab1e11111111111111111111111"); -/// Represents a version of an IDL at a particular slot +// ============================================================================ +// Generic Transaction Types (provider-agnostic) +// ============================================================================ + +/// Represents a version of an IDL at a particular position #[derive(Debug, Clone)] pub struct IdlVersionEntry { - pub slot: u64, + pub position: SlotPosition, pub signature: Signature, } +/// A parsed transaction with resolved account keys. +/// This is the common format used throughout the application. +#[derive(Clone, Debug)] +pub struct ParsedTransaction { + pub signature: Option, + pub slot: u64, + /// Transaction index within the slot. None if not available from the provider. + pub transaction_index: Option, + pub instructions: Option>, +} + +impl ParsedTransaction { + /// Returns the SlotPosition for this transaction. + /// Uses transaction_index 0 if not available. + pub fn position(&self) -> SlotPosition { + SlotPosition::new(self.slot, self.transaction_index.unwrap_or(0)) + } +} + +/// A parsed instruction with resolved pubkeys. +#[derive(Clone, Debug)] +pub struct ParsedInstruction { + pub program_id: Pubkey, + /// Base58-encoded instruction data + pub data: String, + pub accounts: Option>, + pub inner_instructions: Option>, +} + +// ============================================================================ +// Transaction Provider Trait +// ============================================================================ + +/// Trait for providers that can fetch transactions for accounts. +#[allow(dead_code, async_fn_in_trait)] +pub trait TransactionProvider { + /// Fetches all transactions for an account, handling pagination internally. + async fn fetch_transactions(&self, address: &Pubkey) -> eyre::Result>; +} + +// ============================================================================ +// Helius REST API Response Types +// ============================================================================ + +/// Raw transaction from Helius Enhanced Transactions API. +/// This maps directly to the JSON response. #[serde_as] -#[derive(Clone, Deserialize)] -pub struct HeliusTx { +#[derive(Clone, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct HeliusTransaction { #[serde_as(as = "Option")] pub signature: Option, pub slot: Option, pub instructions: Option>, } +impl HeliusTransaction { + /// Converts to the generic ParsedTransaction format. + pub fn into_parsed(self) -> Option { + Some(ParsedTransaction { + signature: self.signature, + slot: self.slot?, + transaction_index: None, // REST API doesn't provide this + instructions: self.instructions.map(|ixs| { + ixs.into_iter().map(HeliusInstruction::into_parsed).collect() + }), + }) + } +} + +/// Raw instruction from Helius Enhanced Transactions API. #[serde_as] -#[derive(Clone, Deserialize)] +#[derive(Clone, Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct HeliusInstruction { #[serde_as(as = "DisplayFromStr")] @@ -34,6 +102,24 @@ pub struct HeliusInstruction { pub inner_instructions: Option>, } +impl HeliusInstruction { + /// Converts to the generic ParsedInstruction format. + pub fn into_parsed(self) -> ParsedInstruction { + ParsedInstruction { + program_id: self.program_id, + data: self.data, + accounts: self.accounts, + inner_instructions: self.inner_instructions.map(|ixs| { + ixs.into_iter().map(HeliusInstruction::into_parsed).collect() + }), + } + } +} + +// ============================================================================ +// Program Upgrade Types +// ============================================================================ + #[derive(Debug)] pub struct WriteChunk { pub offset: usize, @@ -42,13 +128,13 @@ pub struct WriteChunk { #[derive(Debug, Clone)] pub struct UpgradeEntry { - pub slot: u64, + pub position: SlotPosition, pub buffer: Pubkey, } #[derive(Debug, Clone)] pub struct ExtendEntry { - pub slot: u64, + pub position: SlotPosition, pub additional_bytes: u32, } diff --git a/src/core/upgrades.rs b/src/core/upgrades.rs index 560026a..d409ef6 100644 --- a/src/core/upgrades.rs +++ b/src/core/upgrades.rs @@ -1,13 +1,13 @@ use crate::core::{ helius::Helius, types::{ - BPF_UPGRADEABLE_LOADER, ExtendEntry, HeliusInstruction, ProgramChangeEntries, UpgradeEntry, + BPF_UPGRADEABLE_LOADER, ExtendEntry, ParsedInstruction, ProgramChangeEntries, UpgradeEntry, UpgradeableLoaderInstruction, }, + SlotPosition, }; use eyre::eyre; use solana_pubkey::Pubkey; -use solana_rpc_client::api::response::transaction::Signature; use std::collections::BTreeMap; use wincode::Deserialize; @@ -16,35 +16,23 @@ pub async fn collect_program_change_entries( program_data_pubkey: &Pubkey, min_slot: u64, ) -> eyre::Result { - let mut before: Option = None; - let mut upgrades: BTreeMap = BTreeMap::new(); - let mut extensions: BTreeMap = BTreeMap::new(); + let mut upgrades: BTreeMap = BTreeMap::new(); + let mut extensions: BTreeMap = BTreeMap::new(); - loop { - let batch = helius.fetch_txs_for_account(program_data_pubkey, &before).await?; - if batch.is_empty() { - break; - } + // Fetch all transactions for the program data account + let all_txs = helius.keep_fetching_txs_for_account(program_data_pubkey).await?; - for tx in &batch { - let Some(slot) = tx.slot else { - continue; - }; - if slot < min_slot { - return Ok(ProgramChangeEntries { - upgrades: upgrades.into_values().collect(), - extensions: extensions.into_values().collect(), - }); - } - let Some(instrs) = &tx.instructions else { - continue; - }; - for instr in instrs { - process_instruction(instr, slot, &mut upgrades, &mut extensions)?; - } + for tx in &all_txs { + let position = tx.position(); + if position.slot() < min_slot { + continue; + } + let Some(instrs) = &tx.instructions else { + continue; + }; + for instr in instrs { + process_instruction(instr, position, &mut upgrades, &mut extensions)?; } - - before = batch.last().and_then(|tx| tx.signature); } Ok(ProgramChangeEntries { @@ -54,10 +42,10 @@ pub async fn collect_program_change_entries( } fn process_instruction( - instr: &HeliusInstruction, - slot: u64, - upgrades: &mut BTreeMap, - extensions: &mut BTreeMap, + instr: &ParsedInstruction, + position: SlotPosition, + upgrades: &mut BTreeMap, + extensions: &mut BTreeMap, ) -> eyre::Result<()> { if instr.program_id == BPF_UPGRADEABLE_LOADER { let raw = bs58::decode(&instr.data).into_vec()?; @@ -66,18 +54,18 @@ fn process_instruction( UpgradeableLoaderInstruction::Upgrade => { let accounts = instr.accounts.clone().unwrap_or_default(); let buffer = accounts.get(2).ok_or(eyre!("missing buffer"))?; - upgrades.entry(slot).or_insert(UpgradeEntry { slot, buffer: *buffer }); + upgrades.entry(position).or_insert(UpgradeEntry { position, buffer: *buffer }); } UpgradeableLoaderInstruction::DeployWithMaxDataLen { .. } => { let accounts = instr.accounts.clone().unwrap_or_default(); let buffer = accounts.get(3).ok_or(eyre!("missing buffer"))?; - upgrades.entry(slot).or_insert(UpgradeEntry { slot, buffer: *buffer }); + upgrades.entry(position).or_insert(UpgradeEntry { position, buffer: *buffer }); } UpgradeableLoaderInstruction::ExtendProgram { additional_bytes } => { - extensions.entry(slot).or_insert(ExtendEntry { slot, additional_bytes }); + extensions.entry(position).or_insert(ExtendEntry { position, additional_bytes }); } UpgradeableLoaderInstruction::ExtendProgramChecked { additional_bytes } => { - extensions.entry(slot).or_insert(ExtendEntry { slot, additional_bytes }); + extensions.entry(position).or_insert(ExtendEntry { position, additional_bytes }); } _ => {} } @@ -86,7 +74,7 @@ fn process_instruction( if let Some(inner) = &instr.inner_instructions { for inner_instr in inner { - process_instruction(inner_instr, slot, upgrades, extensions)?; + process_instruction(inner_instr, position, upgrades, extensions)?; } }