Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c647358
docs(trove): add native tool calling design spec
mathuryash5 Apr 25, 2026
0f872ac
docs(trove): add native tool calling implementation plan
mathuryash5 Apr 25, 2026
a80fc28
fix(trove): infra patches for native tool calling
mathuryash5 Apr 25, 2026
91cd92f
feat(trove): add imported_callsites helper and task_family to parse_r…
mathuryash5 Apr 25, 2026
7ffddbe
feat(trove): PBEBench-shaped few-shots and IMPORT-with-tools prompt
mathuryash5 Apr 25, 2026
5cd4fd3
feat(trove): add tools_api for native OpenAI tool calling
mathuryash5 Apr 25, 2026
06116b1
fix(trove): correct misleading 'stderr' key in tools_api error payload
mathuryash5 Apr 25, 2026
6ee331c
feat(trove): add TroVELLMClient.chat_with_tools for native tool calls
mathuryash5 Apr 25, 2026
ace6048
feat(trove): controller branch for native IMPORT tool calling
mathuryash5 Apr 25, 2026
5f1ff88
docs(trove): align TroVEController class docstring with new params
mathuryash5 Apr 25, 2026
d8a76a4
feat(trove): CLI flags --trove-selection and --trove-task-family
mathuryash5 Apr 25, 2026
a19309b
chore(launcher): enable native tool calling for gpt-oss-120b vLLM server
mathuryash5 Apr 25, 2026
8c32e0c
feat(trove): add analyze_trove_run.py for post-hoc telemetry reports
mathuryash5 Apr 25, 2026
ff6a6d8
docs(trove): rewrite deviations.md for native tool calling
mathuryash5 Apr 25, 2026
ab7b7a3
fix(trove): persist TroVE telemetry through _append_task_output
mathuryash5 Apr 25, 2026
ce75297
feat(trove): add notebooks/run_trove_pbebench.ipynb runpod runner
mathuryash5 Apr 25, 2026
e7897d4
chore(trove): target gpt-oss-20b for the TroVE smoke run
mathuryash5 Apr 25, 2026
4ce48ac
chore(trove-notebook): add tail_vllm_log helper and mirror run output…
mathuryash5 Apr 25, 2026
64a930e
fix(trove): read vLLM gpt-oss responses from reasoning field
mathuryash5 Apr 26, 2026
aff5962
fix(trove): make PBEBench prompts print replace program lists
mathuryash5 Apr 26, 2026
83528d5
fix(trove): prefer reusable candidates on reward ties
mathuryash5 Apr 26, 2026
94bc0d1
fix(trove): encourage reusable PBEBench helpers
mathuryash5 Apr 27, 2026
4352d31
fix(trove): show PBEBench helper signatures in CREATE prompt
mathuryash5 Apr 29, 2026
51edc0c
chore(trove): remove superpowers planning docs
mathuryash5 Apr 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,22 @@ def _append_task_output(result: dict, task_index: int, output_file: str) -> None
"token_usage": result.get("token_usage", {}),
"agent_messages": result.get("agent_messages", []),
}
# TroVE telemetry: passthrough when present so scripts/analyze_trove_run.py
# (and any other post-hoc analyzer) can read per-task tool-use stats and the
# final library state from the JSONL. Keys are absent on non-TroVE runs.
for key in (
"won_mode",
"import_eligible",
"import_was_winner",
"tool_calls",
"tool_call_count",
"tools_called",
"actually_called",
"trove_stopped_reason",
"library_snapshot",
):
if key in result:
record[key] = result[key]
Path(output_file).parent.mkdir(parents=True, exist_ok=True)
with open(output_file, "a", encoding="utf-8") as f:
f.write(json.dumps(record, default=str) + "\n")
Expand Down Expand Up @@ -808,6 +824,23 @@ def main() -> None:
help="[TroVE] Trim low-frequency toolbox functions every N tasks. "
"Paper default: 500. Set to 9999 to disable for small datasets. (default: 500)",
)
parser.add_argument(
"--trove-selection",
choices=["reward", "consistency"],
default="reward",
help="[TroVE] Candidate selection strategy. 'reward' (default) uses "
"the per-task reward function with AST tie-breaking. "
"'consistency' uses the original TroVE majority-vote algorithm. "
"(default: reward)",
)
parser.add_argument(
"--trove-task-family",
choices=["default", "pbebench"],
default="default",
help="[TroVE] Task family for prompt selection and parser strictness. "
"'pbebench' uses PBEBench-shaped few-shots and strict **Solution** "
"parsing (no fallback to any python block). (default: default)",
)
# ReGAL-specific flags
parser.add_argument(
"--regal-train-file",
Expand Down Expand Up @@ -1007,8 +1040,13 @@ def main() -> None:
debug_dir=args.debug_dir,
k=args.trove_k,
trim_every=args.trove_trim_every,
task_family=args.trove_task_family,
selection=args.trove_selection,
)
logger.info(
"Framework: TroVE (k=%d, trim_every=%d, task_family=%s, selection=%s)",
args.trove_k, args.trove_trim_every, args.trove_task_family, args.trove_selection,
)
logger.info("Framework: TroVE (k=%d, trim_every=%d)", args.trove_k, args.trove_trim_every)
elif args.framework == "regal":
from pathlib import Path as _Path
controller = ReGALController(
Expand Down
Loading