Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.env
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
test_gemini.py
.venv/
venv/
env/
*.log
55 changes: 3 additions & 52 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,54 +1,5 @@
# NVARC solution to ARC-AGI-2 2025
# Trelis-NVARC

This repository contains the code and instructions to replicate the NVARC submissions to the [Arc Prize 2025 competition on Kaggle](https://www.kaggle.com/competitions/arc-prize-2025).
ARC-AGI-2 trace generation and NVARC integration using Gemini 3 Flash.

The NVARC team includes Ivan Sorokin and Jean-Francois Puget, who are also members of the NVIDIA [KGMoN](https://www.nvidia.com/en-us/ai-data-science/kaggle-grandmasters) team.

The solution is described in the [paper](nvarc_2025.pdf) and consists of three main components:

- Multi-stage synthetic data generation pipeline;
- Improved version of the ARChitects solution that won the [ARC Prize competition in 2024](https://www.kaggle.com/competitions/arc-prize-2024);
- Improved version of Tiny Recursive Models by Alexia Jolicoeur-Martineau.

## Synthetic Data Generation

The scripts and prompts for Synthetic Data Generation pipeline can be found in [SDG](SDG) folder.

[NVARC Artifacts Puzzles](https://www.kaggle.com/datasets/sorokin/nvarc-artifacts-puzzles) dataset includes generated text used to construct the synthetic puzzles.

```bash
kaggle datasets download -d sorokin/nvarc-artifacts-puzzles
unzip nvarc-artifacts-puzzles.zip
```

[NVARC Synthetic Puzzles](https://www.kaggle.com/datasets/sorokin/nvarc-synthetic-puzzles) dataset includes our 103k synthetic puzzles.

```bash
kaggle datasets download -d sorokin/nvarc-synthetic-puzzles
unzip nvarc-synthetic-puzzles.zip
```

[NVARC Augmented Puzzles](https://www.kaggle.com/datasets/sorokin/nvarc-augmented-puzzles) dataset includes few subsets with 3.2M augmented puzzles.

```bash
kaggle datasets download -d sorokin/nvarc-augmented-puzzles
unzip nvarc-augmented-puzzles.zip
```

Visualization of synthetic puzzles shown in the Kaggle notebook [nvarc-viewer](https://www.kaggle.com/code/sorokin/nvarc-viewer).

## The ARChitects

The hyperparameters and fine-tuning scripts for the Qwen3 4B model are located in the [ARChitects](ARChitects) folder.

The submission notebook is available on Kaggle [sorokin/arc2-qwen3-unsloth-flash-lora-batch4-queue](https://www.kaggle.com/code/sorokin/arc2-qwen3-unsloth-flash-lora-batch4-queue).

## Tiny Recursive Models

The scripts and instructions to train Tiny Recursive Models are in the [TRM](TRM) folder.

The submission notebook is available on Kaggle [cpmpml/arc2-trm-v31](https://www.kaggle.com/code/cpmpml/arc2-trm-v31?scriptVersionId=278223801).

## ARC AGI 2024

We ran our winning solution on last year ARC AGI evaluation data. The code can be found in the [ARC-AGI1](ARC-AGI1) folder.
Run scripts with `uv run script.py -h` for help.
16 changes: 11 additions & 5 deletions SDG/scripts/generate_input_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def validate_grids(grids: list[Grid]) -> bool:
if len(grids) < 5:
if len(grids) < 1:
return False
unique_colors = set()
first_seed, first_grid = grids[0]
Expand Down Expand Up @@ -89,10 +89,16 @@ def generate_grids(inputs_mask: str, grids_prefix: str, num_grids: int, init_see
for file_name in tqdm(puzzle_files, desc="Generating input grids"):

puzzle_parts = file_name.split("/")
puzzle_name = puzzle_parts[-1][:-3]
puzzle_batch = puzzle_parts[-3]
split_name = puzzle_parts[-4]
grids_json = f"{grids_prefix}/{split_name}/{puzzle_batch}/{puzzle_name}.json"
puzzle_name = os.path.splitext(puzzle_parts[-1])[0]
# Robust path extraction
puzzle_batch = puzzle_parts[-3] if len(puzzle_parts) >= 3 else "default_batch"
split_name = puzzle_parts[-4] if len(puzzle_parts) >= 4 else "default_split"

# If paths are shallow, just put everything in a flat grids folder
if len(puzzle_parts) < 3:
grids_json = f"{grids_prefix}/{puzzle_name}.json"
else:
grids_json = f"{grids_prefix}/{split_name}/{puzzle_batch}/{puzzle_name}.json"

if os.path.exists(grids_json):
num_existing_grids += 1
Expand Down
79 changes: 47 additions & 32 deletions SDG/scripts/generate_output_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,56 +28,71 @@ def generate_output_grid(output_code: str, input_grid: Grid) -> Grid | None:

def generate_grids(solutions_mask: str, input_grids_prefix: str, output_grids_prefix: str, min_solutions_per_puzzle: int):

puzzle_files = glob.glob(solutions_mask + "/completions/0.md")
print(f"Found {len(puzzle_files)} solution puzzles in {solutions_mask}")
# If the mask already points to files (e.g. logic/*.py), use it directly
if solutions_mask.endswith(".py"):
puzzle_files = glob.glob(solutions_mask)
else:
puzzle_files = glob.glob(solutions_mask + "/completions/0.md")
print(f"Found {len(puzzle_files)} target files in {solutions_mask}")

num_skipped_puzzles = 0
num_existing_grids = 0

for file_name in tqdm(puzzle_files, desc="Generating output grids"):

puzzle_dir = os.path.dirname(os.path.dirname(file_name))
puzzle_parts = puzzle_dir.split("/")
puzzle_name = puzzle_parts[-1]
puzzle_batch = puzzle_parts[-2]
puzzle_version = puzzle_parts[-3]

grids_output_json = f"{output_grids_prefix}/{puzzle_version}/{puzzle_batch}/{puzzle_name}.json"
if os.path.exists(grids_output_json):
num_existing_grids += 1
continue

grids_input_json = f"{input_grids_prefix}/{puzzle_version}/{puzzle_batch}/{puzzle_name}.json"
if file_name.endswith(".py"):
puzzle_name = os.path.splitext(os.path.basename(file_name))[0]
puzzle_batch = "default_batch"
puzzle_version = "default_version"
puzzle_dir = os.path.dirname(file_name)
else:
puzzle_dir = os.path.dirname(os.path.dirname(file_name))
puzzle_parts = puzzle_dir.split("/")
puzzle_name = puzzle_parts[-1]
puzzle_batch = puzzle_parts[-2]
puzzle_version = puzzle_parts[-3]

if file_name.endswith(".py"):
grids_output_json = os.path.join(output_grids_prefix, f"{puzzle_name}.json")
grids_input_json = os.path.join(input_grids_prefix, f"{puzzle_name}.json")
else:
grids_output_json = os.path.join(output_grids_prefix, puzzle_version, puzzle_batch, f"{puzzle_name}.json")
grids_input_json = os.path.join(input_grids_prefix, puzzle_version, puzzle_batch, f"{puzzle_name}.json")

if not os.path.exists(grids_input_json):
num_skipped_puzzles += 1
continue

try:
with open(grids_input_json, "r") as f:
input_grids = json.load(f)
assert len(input_grids) == 30
assert len(input_grids) >= 1
except:
print(f"Error loading {grids_input_json}")
num_skipped_puzzles += 1
continue

output_codes = []
for i in range(20):
if not os.path.exists(f"{puzzle_dir}/completions/{i}.md"):
break
with open(f"{puzzle_dir}/completions/{i}.md", "r") as f:
output_code = f.read()
output_code = parse_python_code(output_code)
if output_code is None:
break
try:
output_code = remove_unused_functions(output_code)
except:
break
if "def generate_puzzle_output(" not in output_code:
break
output_codes.append(output_code)
if len(output_codes) < min_solutions_per_puzzle:
if file_name.endswith(".py"):
output_codes = [parse_python_code(open(file_name).read())]
else:
output_codes = []
for i in range(20):
if not os.path.exists(f"{puzzle_dir}/completions/{i}.md"):
break
with open(f"{puzzle_dir}/completions/{i}.md", "r") as f:
output_code = f.read()
output_code = parse_python_code(output_code)
if output_code is None:
break
try:
output_code = remove_unused_functions(output_code)
except:
break
if "def generate_puzzle_output(" not in output_code:
break
output_codes.append(output_code)

if len(output_codes) < min_solutions_per_puzzle and not file_name.endswith(".py"):
num_skipped_puzzles += 1
continue

Expand Down
16 changes: 10 additions & 6 deletions SDG/scripts/parser.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import re
import tree_sitter_python
from tree_sitter import Language, Parser, Node
from tree_sitter import Language, Parser, Node, Query, QueryCursor

PY_LANGUAGE = Language(tree_sitter_python.language())

parser = Parser()
parser.language = PY_LANGUAGE
parser = Parser(PY_LANGUAGE)

def get_value(match: dict[str, list[Node]], name: str) -> str:
values = match[name]
assert len(values) == 1
return values[0].text.decode("utf8")

def parse_functions(text: str) -> dict[str, str]:
query = PY_LANGUAGE.query(
query = Query(
PY_LANGUAGE,
"""
(
function_definition
Expand All @@ -23,7 +23,11 @@ def parse_functions(text: str) -> dict[str, str]:
)
names = dict()
tree = parser.parse(bytes(text, "utf8"))
for _, match in query.matches(tree.root_node):

# matches() is now used like this in newer tree-sitter versions:
# returns iterator of (pattern_index, capture_dict)
cursor = QueryCursor(query)
for _, match in cursor.matches(tree.root_node):
if match["code"][0].start_point.column != 0:
# Exclude nested functions
continue
Expand Down Expand Up @@ -72,6 +76,6 @@ def remove_unused_functions(code: str) -> str:
def parse_python_code(code: str):
codes = re.findall(r"```python(.*?)```", code, re.DOTALL)
if not codes:
return None
return code.strip()
longest_code = max(codes, key=len)
return longest_code.strip()
1 change: 1 addition & 0 deletions arc_agi2_training_only/arc-agi_training_challenges.json

Large diffs are not rendered by default.

Loading