Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion roboclaw/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Agent core module."""

from roboclaw.agent.context import ContextBuilder
from roboclaw.agent.experience import ExperienceStore
from roboclaw.agent.loop import AgentLoop
from roboclaw.agent.memory import MemoryStore
from roboclaw.agent.skills import SkillsLoader

__all__ = ["AgentLoop", "ContextBuilder", "MemoryStore", "SkillsLoader"]
__all__ = ["AgentLoop", "ContextBuilder", "ExperienceStore", "MemoryStore", "SkillsLoader"]
15 changes: 13 additions & 2 deletions roboclaw/agent/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path
from typing import Any

from roboclaw.agent.experience import ExperienceStore
from roboclaw.utils.helpers import current_time_str

from roboclaw.agent.memory import MemoryStore
Expand All @@ -22,9 +23,15 @@ class ContextBuilder:
def __init__(self, workspace: Path):
self.workspace = workspace
self.memory = MemoryStore(workspace)
self.experiences = ExperienceStore(workspace)
self.skills = SkillsLoader(workspace)

def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
def build_system_prompt(
self,
skill_names: list[str] | None = None,
*,
current_message: str = "",
) -> str:
"""Build the system prompt from identity, bootstrap files, memory, and skills."""
parts = [self._get_identity()]

Expand All @@ -36,6 +43,10 @@ def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
if memory:
parts.append(f"# Memory\n\n{memory}")

experience_context = self.experiences.build_context(query=current_message)
if experience_context:
parts.append(f"# Relevant Experience\n\n{experience_context}")

always_skills = self.skills.get_always_skills()
if always_skills:
always_content = self.skills.load_skills_for_context(always_skills)
Expand Down Expand Up @@ -138,7 +149,7 @@ def build_messages(
merged = [{"type": "text", "text": runtime_ctx}] + user_content

return [
{"role": "system", "content": self.build_system_prompt(skill_names)},
{"role": "system", "content": self.build_system_prompt(skill_names, current_message=current_message)},
*history,
{"role": "user", "content": merged},
]
Expand Down
240 changes: 240 additions & 0 deletions roboclaw/agent/experience.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
"""Structured experience storage and retrieval for agent adaptation."""

from __future__ import annotations

from dataclasses import asdict, dataclass
from datetime import UTC, datetime
import json
from pathlib import Path
import re
from typing import Any

from roboclaw.utils.helpers import ensure_dir

_TOKEN_RE = re.compile(r"[a-z0-9][a-z0-9_./:-]*")


def _normalize_text(value: str | None) -> str:
return (value or "").strip()


def _normalize_key(value: str | None) -> str:
return _normalize_text(value).lower()


def _tokenize(*values: str) -> set[str]:
tokens: set[str] = set()
for value in values:
for match in _TOKEN_RE.finditer(value.lower()):
token = match.group(0)
if len(token) >= 2:
tokens.add(token)
return tokens


@dataclass(frozen=True)
class ExperienceRecord:
timestamp: str
task_type: str
summary: str
outcome: str
lesson: str = ""
dataset: str = ""
replay_datasets: str = ""
policy: str = ""
provider: str = ""
job_id: str = ""
source: str = ""
error: str = ""
checkpoint_path: str = ""
dataset_path: str = ""
task_name: str = ""

@classmethod
def create(
cls,
*,
task_type: str,
summary: str,
outcome: str,
lesson: str = "",
dataset: str = "",
replay_datasets: str = "",
policy: str = "",
provider: str = "",
job_id: str = "",
source: str = "",
error: str = "",
checkpoint_path: str = "",
dataset_path: str = "",
task_name: str = "",
) -> "ExperienceRecord":
return cls(
timestamp=datetime.now(UTC).isoformat(),
task_type=_normalize_text(task_type),
summary=_normalize_text(summary),
outcome=_normalize_text(outcome),
lesson=_normalize_text(lesson),
dataset=_normalize_text(dataset),
replay_datasets=_normalize_text(replay_datasets),
policy=_normalize_text(policy),
provider=_normalize_text(provider),
job_id=_normalize_text(job_id),
source=_normalize_text(source),
error=_normalize_text(error),
checkpoint_path=_normalize_text(checkpoint_path),
dataset_path=_normalize_text(dataset_path),
task_name=_normalize_text(task_name),
)

@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "ExperienceRecord":
return cls(
timestamp=_normalize_text(str(payload.get("timestamp") or "")),
task_type=_normalize_text(str(payload.get("task_type") or "")),
summary=_normalize_text(str(payload.get("summary") or "")),
outcome=_normalize_text(str(payload.get("outcome") or "")),
lesson=_normalize_text(str(payload.get("lesson") or "")),
dataset=_normalize_text(str(payload.get("dataset") or "")),
replay_datasets=_normalize_text(str(payload.get("replay_datasets") or "")),
policy=_normalize_text(str(payload.get("policy") or "")),
provider=_normalize_text(str(payload.get("provider") or "")),
job_id=_normalize_text(str(payload.get("job_id") or "")),
source=_normalize_text(str(payload.get("source") or "")),
error=_normalize_text(str(payload.get("error") or "")),
checkpoint_path=_normalize_text(str(payload.get("checkpoint_path") or "")),
dataset_path=_normalize_text(str(payload.get("dataset_path") or "")),
task_name=_normalize_text(str(payload.get("task_name") or "")),
)

def to_dict(self) -> dict[str, Any]:
return asdict(self)

def fingerprint(self) -> str:
parts = (
_normalize_key(self.task_type),
_normalize_key(self.summary),
_normalize_key(self.outcome),
_normalize_key(self.dataset),
_normalize_key(self.replay_datasets),
_normalize_key(self.policy),
_normalize_key(self.provider),
_normalize_key(self.job_id),
_normalize_key(self.error),
_normalize_key(self.lesson),
)
return "|".join(parts)


class ExperienceStore:
"""Append-only JSONL store plus lightweight experience retrieval."""

def __init__(self, workspace: Path):
self.memory_dir = ensure_dir(workspace / "memory")
self.experience_file = self.memory_dir / "EXPERIENCES.jsonl"

def read_all(self) -> list[ExperienceRecord]:
if not self.experience_file.exists():
return []
records: list[ExperienceRecord] = []
for raw_line in self.experience_file.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
continue
if isinstance(payload, dict):
records.append(ExperienceRecord.from_dict(payload))
return records

def append(self, record: ExperienceRecord) -> bool:
records = self.read_all()
fingerprint = record.fingerprint()
if any(existing.fingerprint() == fingerprint for existing in records):
return False
with self.experience_file.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(record.to_dict(), ensure_ascii=False) + "\n")
return True

def search(
self,
*,
query: str = "",
task_type: str = "",
dataset: str = "",
policy: str = "",
provider: str = "",
outcomes: frozenset[str] | None = None,
limit: int = 3,
) -> list[ExperienceRecord]:
query_tokens = _tokenize(query, dataset, policy, provider, task_type)
scored: list[tuple[int, ExperienceRecord]] = []
for record in self.read_all():
if outcomes is not None and record.outcome not in outcomes:
continue
score = 0
if task_type and _normalize_key(record.task_type) == _normalize_key(task_type):
score += 6
if dataset and _normalize_key(record.dataset) == _normalize_key(dataset):
score += 8
if policy and _normalize_key(record.policy) == _normalize_key(policy):
score += 6
if provider and _normalize_key(record.provider) == _normalize_key(provider):
score += 4

record_tokens = _tokenize(
record.summary,
record.lesson,
record.dataset,
record.policy,
record.provider,
record.task_name,
record.error,
)
score += len(query_tokens & record_tokens)
if score <= 0:
continue
scored.append((score, record))

scored.sort(key=lambda item: (item[0], item[1].timestamp), reverse=True)
return [record for _, record in scored[:limit]]

def build_context(
self,
*,
query: str = "",
task_type: str = "",
dataset: str = "",
policy: str = "",
provider: str = "",
limit: int = 3,
) -> str:
records = self.search(
query=query,
task_type=task_type,
dataset=dataset,
policy=policy,
provider=provider,
limit=limit,
)
if not records:
return ""

lines = [
"Use these past outcomes as hints. Reuse what worked and avoid repeating failures."
]
for record in records:
fields = [record.outcome]
if record.dataset:
fields.append(f"dataset={record.dataset}")
if record.replay_datasets:
fields.append(f"replay={record.replay_datasets}")
if record.policy:
fields.append(f"policy={record.policy}")
if record.provider:
fields.append(f"provider={record.provider}")
summary = record.lesson or record.summary
lines.append(f"- [{record.timestamp[:19]}] {'; '.join(fields)} -> {summary}")
return "\n".join(lines)
35 changes: 13 additions & 22 deletions roboclaw/embodied/command/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)
from roboclaw.embodied.embodiment.arm.registry import get_model
from roboclaw.embodied.embodiment.manifest.binding import ArmBinding, ArmRole, CameraBinding
from roboclaw.embodied.policy import policy_registry

_BIMANUAL: dict[str, tuple[str, str]] = {
"so101": ("bi_so_follower", "bi_so_leader"),
Expand All @@ -28,23 +29,7 @@

_DEFAULT_REPLAY_ROOT = Path("~/.cache/huggingface/lerobot").expanduser()

TRAIN_POLICY_TYPES = {
"act",
"diffusion",
"groot",
"multi_task_dit",
"pi0",
"pi0_fast",
"pi05",
"reward_classifier",
"sac",
"sarm",
"smolvla",
"tdmpc",
"vqbet",
"wall_x",
"xvla",
}
TRAIN_POLICY_TYPES = policy_registry.supported_types()


# ── Private helper functions ─────────────────────────────────────────────
Expand Down Expand Up @@ -288,26 +273,32 @@ def train(
device: str = "cuda",
) -> list[str]:
"""Build training argv (standalone lerobot-train, not through wrapper)."""
if policy_type not in TRAIN_POLICY_TYPES:
allowed = ", ".join(sorted(TRAIN_POLICY_TYPES))
raise ActionError(f"Unsupported policy_type '{policy_type}'. Expected one of: {allowed}.")
try:
policy_config = policy_registry.get(policy_type)
except ValueError as exc:
raise ActionError(str(exc)) from exc

policies_root = manifest.snapshot.get("policies", {}).get("root", "")
output_dir_name = dataset.name if policy_type == "act" else f"{dataset.name}_{policy_type}"
output_dir_name = (
dataset.name
if policy_config.policy_type == "act"
else f"{dataset.name}_{policy_config.policy_type}"
)
output_dir = Path(policies_root).expanduser() / output_dir_name

argv = [
"lerobot-train",
f"--dataset.repo_id={dataset.repo_id}",
f"--dataset.root={dataset.local_path}",
"--dataset.video_backend=pyav",
f"--policy.type={policy_type}",
f"--policy.type={policy_config.policy_type}",
"--policy.push_to_hub=false",
f"--policy.repo_id={dataset.repo_id}",
f"--output_dir={output_dir}",
f"--steps={steps}",
f"--policy.device={device}",
]
argv.extend(policy_config.extra_train_args())

# Resume if a previous checkpoint exists
if output_dir.is_dir():
Expand Down
17 changes: 17 additions & 0 deletions roboclaw/embodied/policy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Policy config registry for embodied training."""

from roboclaw.embodied.policy.base import BasePolicyConfig
from roboclaw.embodied.policy.registry import PolicyRegistry, policy_registry

from . import act as _act
from . import diffusion as _diffusion
from . import gr00t as _gr00t
from . import pi0 as _pi0
from . import smolvla as _smolvla

__all__ = [
"BasePolicyConfig",
"PolicyRegistry",
"policy_registry",
]

Loading
Loading