Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
233 changes: 233 additions & 0 deletions .github/scripts/check_settings_schema_migrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""Require SDK settings migrations for incompatible persisted schema changes."""

from __future__ import annotations

import json
import os
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Any


REPO_ROOT = Path(__file__).resolve().parents[2]

SCHEMA_PROGRAM = r"""
import json
import sys
from pathlib import Path

source_tree = Path(sys.argv[1])
sys.path = [str(source_tree / path) for path in (
"openhands-sdk",
"openhands-tools",
"openhands-workspace",
"openhands-agent-server",
)] + sys.path

from openhands.sdk.settings import ConversationSettings, export_agent_settings_schema
import openhands.sdk.settings.model as settings_model

print(json.dumps({
"agent_schema": export_agent_settings_schema().model_dump(mode="json"),
"conversation_schema": ConversationSettings.export_schema().model_dump(mode="json"),
"agent_version": settings_model.AGENT_SETTINGS_SCHEMA_VERSION,
"conversation_version": settings_model.CONVERSATION_SETTINGS_SCHEMA_VERSION,
"agent_migrations": sorted(settings_model._AGENT_SETTINGS_MIGRATIONS),
"conversation_migrations": sorted(settings_model._CONVERSATION_SETTINGS_MIGRATIONS),
}))
"""


def _run(command: list[str], *, cwd: Path = REPO_ROOT) -> subprocess.CompletedProcess:
return subprocess.run(command, cwd=cwd, capture_output=True, text=True)


def _candidate_refs(ref: str) -> list[str]:
candidates = [ref]
if not ref.startswith("origin/"):
candidates.insert(0, f"origin/{ref}")
return list(dict.fromkeys(candidates))


def _archive_ref(ref: str) -> Path | None:
for candidate in _candidate_refs(ref):
archive = subprocess.run(
["git", "archive", candidate],
cwd=REPO_ROOT,
capture_output=True,
)
if archive.returncode != 0:
continue

source_tree = Path(tempfile.mkdtemp(prefix="settings-schema-base-src-"))
extract = subprocess.run(
["tar", "-x", "-C", str(source_tree)],
input=archive.stdout,
capture_output=True,
)
if extract.returncode == 0:
return source_tree
return None


def _capture(source_tree: Path) -> dict[str, Any]:
result = _run([sys.executable, "-c", SCHEMA_PROGRAM, str(source_tree)])
if result.returncode != 0:
raise RuntimeError(result.stderr[-2000:] or result.stdout[-2000:])
return json.loads(result.stdout)


def _flatten_fields(schema: dict[str, Any]) -> dict[str, dict[str, Any]]:
fields: dict[str, dict[str, Any]] = {}
for section in schema.get("sections", []):
section_variant = section.get("variant")
for field in section.get("fields", []):
variant = field.get("variant") or section_variant or "*"
field_id = f"{variant}:{field.get('key')}"
fields[field_id] = field
return fields


def _choice_values(field: dict[str, Any]) -> set[Any]:
return {choice.get("value") for choice in field.get("choices", [])}


def _incompatible_changes(
old_schema: dict[str, Any],
new_schema: dict[str, Any],
) -> list[str]:
old_fields = _flatten_fields(old_schema)
new_fields = _flatten_fields(new_schema)

changes: list[str] = []
for field_id, old_field in sorted(old_fields.items()):
new_field = new_fields.get(field_id)
if new_field is None:
changes.append(f"removed field {field_id}")
continue

if old_field.get("value_type") != new_field.get("value_type"):
changes.append(
"changed field type "
f"{field_id}: {old_field.get('value_type')} -> "
f"{new_field.get('value_type')}"
)

removed_choices = _choice_values(old_field) - _choice_values(new_field)
if removed_choices:
changes.append(
f"removed choices from {field_id}: {sorted(removed_choices)!r}"
)

return changes


def _check_model(
*,
name: str,
old_info: dict[str, Any],
new_info: dict[str, Any],
schema_key: str,
version_key: str,
migrations_key: str,
) -> list[str]:
errors: list[str] = []
changes = _incompatible_changes(old_info[schema_key], new_info[schema_key])
old_version = int(old_info[version_key])
new_version = int(new_info[version_key])
migrations = set(new_info[migrations_key])

if new_version < old_version:
errors.append(
f"{name} schema version went backwards: {old_version} -> {new_version}"
)

if not changes:
return errors

print(f"{name} incompatible settings schema changes:")
for change in changes:
print(f" - {change}")

if new_version <= old_version:
errors.append(
f"{name} has incompatible settings schema changes but "
f"{version_key} did not increase ({old_version} -> {new_version})."
)
return errors

missing = [
version
for version in range(old_version, new_version)
if version not in migrations
]
if missing:
errors.append(
f"{name} schema version increased ({old_version} -> {new_version}) "
f"but migrations are missing for version(s): {missing}."
)

return errors


def _base_ref() -> str:
return (
os.environ.get("SETTINGS_SCHEMA_BASE_REF")
or os.environ.get("GITHUB_BASE_REF")
or "origin/main"
)


def main() -> int:
base_ref = _base_ref()
base_tree = _archive_ref(base_ref)
if base_tree is None:
print(
f"::warning title=Settings schema::Unable to read base ref {base_ref}; "
"skipping settings schema migration check."
)
return 0

try:
old_info = _capture(base_tree)
new_info = _capture(REPO_ROOT)
except Exception as exc:
print(f"::error title=Settings schema::Failed to capture schemas: {exc}")
return 1

errors: list[str] = []
errors.extend(
_check_model(
name="AgentSettings",
old_info=old_info,
new_info=new_info,
schema_key="agent_schema",
version_key="agent_version",
migrations_key="agent_migrations",
)
)
errors.extend(
_check_model(
name="ConversationSettings",
old_info=old_info,
new_info=new_info,
schema_key="conversation_schema",
version_key="conversation_version",
migrations_key="conversation_migrations",
)
)

if errors:
for error in errors:
print(f"::error title=Settings schema::{error}")
return 1

print("Settings schema migration check passed.")
return 0


if __name__ == "__main__":
raise SystemExit(main())
4 changes: 4 additions & 0 deletions .github/workflows/api-breakage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ jobs:
enable-cache: true
- name: Install workspace deps (dev)
run: uv sync --frozen --group dev
- name: Run settings schema migration check
env:
SETTINGS_SCHEMA_BASE_REF: ${{ github.event_name == 'pull_request' && github.base_ref || github.event.before }}
run: uv run python .github/scripts/check_settings_schema_migrations.py
- name: Run Python API breakage check
id: api_breakage
# Let this step fail so CI is visibly red on breakage.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from openhands.sdk.settings import (
AgentSettingsConfig,
ConversationSettings,
apply_agent_settings_diff,
default_agent_settings,
validate_agent_settings,
)
Expand Down Expand Up @@ -146,7 +147,11 @@ class PersistedSettings(BaseModel):
),
)

model_config = ConfigDict(populate_by_name=True)
model_config = ConfigDict(
populate_by_name=True,
validate_assignment=True,
revalidate_instances="always",
)

@property
def llm_api_key_is_set(self) -> bool:
Expand Down Expand Up @@ -197,42 +202,14 @@ def update(self, payload: SettingsUpdatePayload) -> None:
# Phase 1: Validate all updates before any mutations
new_agent: AgentSettingsConfig | None = None
new_conv: ConversationSettings | None = None
agent_merged: dict | None = None
conv_merged: dict | None = None

try:
if isinstance(agent_update, dict):
# Check if this is a variant (agent_kind) switch
old_kind = self.agent_settings.agent_kind
new_kind = agent_update.get("agent_kind")
is_kind_switch = new_kind is not None and new_kind != old_kind

if is_kind_switch:
# Variant replacement: validate the diff as-is rather than
# deep-merging it onto the old variant. A kind switch picks a
# different member of the AgentSettingsConfig union, and the
# old variant's serialized fields are not a valid base for the
# new one (e.g. ACP's acp_command has no place in
# OpenHandsAgentSettings and would fail validation).
#
# Consequence (intentional): fields the two variants happen to
# share (e.g. ``llm``) are NOT carried over — they fall back to
# the new variant's defaults unless the caller restates them in
# this same diff. Switching kinds is a fresh start on the new
# variant, mirroring the frontend's "fresh base on kind switch"
# behaviour. Callers that want to preserve a shared field must
# include it in the switch payload.
agent_merged = agent_update
else:
# Same-kind update: deep-merge for incremental field edits
agent_merged = _deep_merge(
self.agent_settings.model_dump(
mode="json", context={"expose_secrets": "plaintext"}
),
agent_update,
)
try:
new_agent = validate_agent_settings(agent_merged)
new_agent = apply_agent_settings_diff(
self.agent_settings, agent_update
)
except Exception as e:
# Use 'from None' to break exception chain - the original
# exception may contain secret values in Pydantic errors
Expand Down Expand Up @@ -276,8 +253,6 @@ def update(self, payload: SettingsUpdatePayload) -> None:
self.active_profile = payload["active_profile"]
finally:
# Clear merged dicts to minimize plaintext exposure window
if agent_merged is not None:
agent_merged.clear()
if conv_merged is not None:
conv_merged.clear()

Expand Down
27 changes: 27 additions & 0 deletions openhands-agent-server/openhands/agent_server/persistence/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,33 @@ def save(self, settings: PersistedSettings) -> None:
If no cipher is provided, secrets are stored in plaintext.
This is logged as a security warning on first save.
"""
if not isinstance(settings, PersistedSettings):
raise TypeError("FileSettingsStore.save() requires PersistedSettings")

# Revalidate at the persistence boundary. Pydantic assignment or
# model_construct() can bypass normal field validation; the store must
# never serialize that malformed state to disk.
#
# We validate in two steps because each step catches a different class
# of bypass:
# 1. ``model_validate(settings)`` rejects raw-dict payloads sitting in
# slots typed as a sub-model (e.g. ``model_construct`` was called
# with ``agent_settings={"agent_kind": "bogus"}``). It does *not*
# revalidate already-instantiated sub-model instances.
# 2. ``model_validate(model_dump(...))`` then revalidates nested
# sub-models that *were* constructed via ``model_construct()`` —
# Pydantic v2 treats them as opaque on the first pass, so a
# ``ConversationSettings.model_construct(max_iterations=-1)`` would
# slip through step 1 alone.
#
# The dump must use ``expose_secrets="plaintext"`` so SecretStr fields
# round-trip without being redacted/encrypted by ``serialize_secret``;
# the real encrypt-or-redact dump happens below for the actual write.
settings = PersistedSettings.model_validate(settings)
Comment thread
neubig marked this conversation as resolved.
settings = PersistedSettings.model_validate(
Comment thread
neubig marked this conversation as resolved.
settings.model_dump(context={"expose_secrets": "plaintext"})
)

_ensure_secure_directory(self.persistence_dir)

# Pass cipher in context for automatic encryption of all secret fields
Expand Down
Loading
Loading