From 20a0cc09851b5f2e85faabbbe8e126f61e0d0025 Mon Sep 17 00:00:00 2001 From: cgasgarth <64235119+cgasgarth@users.noreply.github.com> Date: Sun, 12 Apr 2026 15:19:17 -0500 Subject: [PATCH 1/2] Add canonical rotate actions Let the agent turn images left or right through the clipping angle control so rotation requests can be handled without raw action paths. --- server/codex_bridge/canonical_binder.py | 24 +++++++ server/codex_bridge/prompts/turn_prompt.j2 | 4 +- server/evals/fixtures.py | 11 ++++ server/tests/test_codex_app_server.py | 76 +++++++++++++++++++++- shared/canonical_plan.py | 4 ++ 5 files changed, 116 insertions(+), 3 deletions(-) diff --git a/server/codex_bridge/canonical_binder.py b/server/codex_bridge/canonical_binder.py index 7b19f14..5b35c70 100644 --- a/server/codex_bridge/canonical_binder.py +++ b/server/codex_bridge/canonical_binder.py @@ -113,6 +113,8 @@ def _bind_action( return _bind_noise(settings, action) if action.action == "grade-color": return _bind_grade(settings, action) + if action.action in {"rotate-left", "rotate-right"}: + return _bind_rotate(settings, action) if action.action == "crop-normalized": return _bind_crop(settings, action) if action.action == "crop-to-bounding-box": @@ -316,6 +318,28 @@ def _bind_grade( ) +def _bind_rotate( + settings: list[EditableSetting], action: CanonicalEditAction +) -> _BindingResult: + rotation_delta = -90.0 if action.action == "rotate-left" else 90.0 + setting = _find_setting( + settings, + kind="set-float", + exact_action_paths=("iop/clipping/angle", "iop/crop/angle"), + module_ids=("clipping", "crop"), + action_keywords=("angle", "rotate"), + label_keywords=("angle", "rotation"), + ) + if setting is None: + return _BindingResult( + [], [f"{action.action} could not find a rotation control"] + ) + return _BindingResult( + [_float_operation(setting, rotation_delta, action.rationale)], + [], + ) + + def _bind_crop( settings: list[EditableSetting], action: CanonicalEditAction ) -> _BindingResult: diff --git a/server/codex_bridge/prompts/turn_prompt.j2 b/server/codex_bridge/prompts/turn_prompt.j2 index 2999c2a..04c135e 100644 --- a/server/codex_bridge/prompts/turn_prompt.j2 +++ b/server/codex_bridge/prompts/turn_prompt.j2 @@ -29,8 +29,8 @@ Tool usage: - Before finalizing, consider whether additional provided controls would materially improve tone, color, detail, crop, or noise; do not stop at basic exposure/contrast edits when stronger supported tools are available. - Always optimize toward refinement.goalText. {% if live_run_enabled %}Live run mode is enabled: use apply_operations for iterative edits inside this same run. -For this multi-turn path, you may pass `canonicalActions` to apply_operations instead of raw operations for these supported intent-level edits: `adjust-exposure`, `adjust-white-balance`, `recover-highlights`, `reduce-noise`, `grade-color`, `crop-normalized`, `crop-to-bounding-box`. -Canonical fields: `adjust-exposure` uses `exposureEv`; `adjust-white-balance` uses `temperatureDelta`, `tintDelta`, and/or `presetChoiceId`; `recover-highlights` and `reduce-noise` use `strength`; `grade-color` uses `target` + `amount`; `crop-normalized` uses `left`, `top`, `right`, `bottom` in normalized [0,1] coordinates. +For this multi-turn path, you may pass `canonicalActions` to apply_operations instead of raw operations for these supported intent-level edits: `adjust-exposure`, `adjust-white-balance`, `recover-highlights`, `reduce-noise`, `grade-color`, `rotate-left`, `rotate-right`, `crop-normalized`, `crop-to-bounding-box`. +Canonical fields: `adjust-exposure` uses `exposureEv`; `adjust-white-balance` uses `temperatureDelta`, `tintDelta`, and/or `presetChoiceId`; `recover-highlights` and `reduce-noise` use `strength`; `grade-color` uses `target` + `amount`; `rotate-left` and `rotate-right` require no extra fields and rotate 90 degrees; `crop-normalized` uses `left`, `top`, `right`, `bottom` in normalized [0,1] coordinates. For subject-centric crops, prefer `crop-to-bounding-box` over raw crop edges: provide `boxLeft`, `boxTop`, `boxWidth`, `boxHeight`, and optional `paddingRatio`, and the runtime will deterministically translate that box into concrete crop/clipping controls. The runtime binds supported canonical actions to concrete darktable controls deterministically before applying them. Inside each apply_operations call, operations are auto-applied one at a time with a fresh render after each step. diff --git a/server/evals/fixtures.py b/server/evals/fixtures.py index 9b7751a..a9d9450 100644 --- a/server/evals/fixtures.py +++ b/server/evals/fixtures.py @@ -207,6 +207,17 @@ def editable_settings() -> list[dict[str, object]]: minimum=-3.14, maximum=3.14, ), + float_setting( + module_id="clipping", + module_label="crop and rotate", + setting_id="setting.clipping.angle", + capability_id="clipping.angle", + label="angle", + action_path="iop/clipping/angle", + current=0.0, + minimum=-180.0, + maximum=180.0, + ), float_setting( module_id="clipping", module_label="crop and rotate", diff --git a/server/tests/test_codex_app_server.py b/server/tests/test_codex_app_server.py index 797ffd9..fa96932 100644 --- a/server/tests/test_codex_app_server.py +++ b/server/tests/test_codex_app_server.py @@ -342,6 +342,20 @@ def _sample_request_with_white_balance_controls() -> RequestEnvelope: def _sample_request_with_canonical_controls() -> RequestEnvelope: payload = _sample_request_with_white_balance_controls().model_dump(mode="json") extra_targets = [ + { + "moduleId": "clipping", + "moduleLabel": "crop", + "capabilityId": "clipping.angle", + "label": "angle", + "kind": "set-float", + "targetType": "darktable-action", + "actionPath": "iop/clipping/angle", + "supportedModes": ["set", "delta"], + "minNumber": -180.0, + "maxNumber": 180.0, + "defaultNumber": 0.0, + "stepNumber": 1.0, + }, { "moduleId": "clipping", "moduleLabel": "crop", @@ -428,6 +442,21 @@ def _sample_request_with_canonical_controls() -> RequestEnvelope: }, ] extra_settings = [ + { + "moduleId": "clipping", + "moduleLabel": "crop", + "settingId": "setting.clipping.angle", + "capabilityId": "clipping.angle", + "label": "angle", + "actionPath": "iop/clipping/angle", + "kind": "set-float", + "currentNumber": 0.0, + "supportedModes": ["set", "delta"], + "minNumber": -180.0, + "maxNumber": 180.0, + "defaultNumber": 0.0, + "stepNumber": 1.0, + }, { "moduleId": "clipping", "moduleLabel": "crop", @@ -854,6 +883,8 @@ def test_turn_prompt_tells_codex_to_infer_broad_edit_plan_from_visual_context() assert "you may pass `canonicalActions` to apply_operations" in prompt assert "adjust-exposure" in prompt assert "grade-color" in prompt + assert "rotate-left" in prompt + assert "rotate-right" in prompt assert "crop-to-bounding-box" in prompt assert "boxLeft" in prompt assert "paddingRatio" in prompt @@ -982,6 +1013,10 @@ def test_canonical_binder_resolves_supported_actions_without_raw_ids() -> None: "bottom": 0.95, "rationale": "Tighten framing.", }, + { + "action": "rotate-right", + "rationale": "Turn the frame clockwise.", + }, ], } ) @@ -1001,10 +1036,43 @@ def test_canonical_binder_resolves_supported_actions_without_raw_ids() -> None: "iop/clipping/cy", "iop/clipping/cw", "iop/clipping/ch", + "iop/clipping/angle", ] assert bound_plan.operations[0].value.mode == "delta" assert bound_plan.operations[6].value.mode == "set" assert bound_plan.operations[6].value.number == pytest.approx(0.1) + assert bound_plan.operations[10].value.mode == "delta" + assert bound_plan.operations[10].value.number == pytest.approx(90.0) + + +def test_canonical_binder_binds_rotate_actions_to_clipping_angle() -> None: + request = _sample_request_with_canonical_controls() + plan = AgentPlan.model_validate( + { + "assistantText": "Rotate the image.", + "continueRefining": False, + "operations": [], + "canonicalActions": [ + {"action": "rotate-left"}, + {"action": "rotate-right"}, + ], + } + ) + + bound_plan = bind_canonical_plan(request, plan) + + assert [operation.target.actionPath for operation in bound_plan.operations] == [ + "iop/clipping/angle", + "iop/clipping/angle", + ] + assert [operation.value.mode for operation in bound_plan.operations] == [ + "delta", + "delta", + ] + assert [operation.value.number for operation in bound_plan.operations] == [ + pytest.approx(-90.0), + pytest.approx(90.0), + ] def test_canonical_binder_translates_bounding_box_crop_to_crop_controls() -> None: @@ -1839,6 +1907,9 @@ def _mock_wait(timeout=None, *, context=turn_context): "right": 0.9, "bottom": 0.9, }, + { + "action": "rotate-right", + }, ] }, }, @@ -1847,7 +1918,7 @@ def _mock_wait(timeout=None, *, context=turn_context): result = sent_payloads[0]["result"] assert result["success"] is True - assert "Applied 5 operations" in result["contentItems"][0]["text"] + assert "Applied 6 operations" in result["contentItems"][0]["text"] turn_context = bridge._get_turn_context("thread-1", "turn-1") # type: ignore[attr-defined] assert turn_context is not None assert turn_context.setting_by_id["setting.exposure.primary"][ @@ -1856,6 +1927,9 @@ def _mock_wait(timeout=None, *, context=turn_context): assert turn_context.setting_by_id["setting.clipping.cx"][ "currentNumber" ] == pytest.approx(0.1) + assert turn_context.setting_by_id["setting.clipping.angle"][ + "currentNumber" + ] == pytest.approx(90.0) finally: bridge._clear_turn_context("thread-1", "turn-1") # type: ignore[attr-defined] diff --git a/shared/canonical_plan.py b/shared/canonical_plan.py index c1516c6..c879594 100644 --- a/shared/canonical_plan.py +++ b/shared/canonical_plan.py @@ -10,6 +10,8 @@ "recover-highlights", "reduce-noise", "grade-color", + "rotate-left", + "rotate-right", "crop-normalized", "crop-to-bounding-box", ] @@ -75,6 +77,8 @@ def validate_action_shape(self) -> "CanonicalEditAction": raise ValueError("grade-color requires target") if self.amount is None: raise ValueError("grade-color requires amount") + elif self.action in {"rotate-left", "rotate-right"}: + pass elif self.action == "crop-normalized": bounds = (self.left, self.top, self.right, self.bottom) if any(value is None for value in bounds): From a3e2cfba4ee6d2f4ade3cfe35d710741cad04b6e Mon Sep 17 00:00:00 2001 From: cgasgarth <64235119+cgasgarth@users.noreply.github.com> Date: Sun, 12 Apr 2026 15:50:16 -0500 Subject: [PATCH 2/2] Use precise canonical rotation amounts Replace the fixed left/right 90 degree rotation shortcuts with a signed angleDegrees field so the agent can request small, exact clipping angle adjustments. --- server/codex_bridge/canonical_binder.py | 10 +++--- server/codex_bridge/prompts/turn_prompt.j2 | 4 +-- server/tests/test_codex_app_server.py | 36 +++++++++++++++------- shared/canonical_plan.py | 9 +++--- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/server/codex_bridge/canonical_binder.py b/server/codex_bridge/canonical_binder.py index 5b35c70..4640f3b 100644 --- a/server/codex_bridge/canonical_binder.py +++ b/server/codex_bridge/canonical_binder.py @@ -113,7 +113,7 @@ def _bind_action( return _bind_noise(settings, action) if action.action == "grade-color": return _bind_grade(settings, action) - if action.action in {"rotate-left", "rotate-right"}: + if action.action == "rotate": return _bind_rotate(settings, action) if action.action == "crop-normalized": return _bind_crop(settings, action) @@ -321,7 +321,6 @@ def _bind_grade( def _bind_rotate( settings: list[EditableSetting], action: CanonicalEditAction ) -> _BindingResult: - rotation_delta = -90.0 if action.action == "rotate-left" else 90.0 setting = _find_setting( settings, kind="set-float", @@ -331,11 +330,10 @@ def _bind_rotate( label_keywords=("angle", "rotation"), ) if setting is None: - return _BindingResult( - [], [f"{action.action} could not find a rotation control"] - ) + return _BindingResult([], ["rotate could not find a rotation control"]) + assert action.angleDegrees is not None return _BindingResult( - [_float_operation(setting, rotation_delta, action.rationale)], + [_float_operation(setting, action.angleDegrees, action.rationale)], [], ) diff --git a/server/codex_bridge/prompts/turn_prompt.j2 b/server/codex_bridge/prompts/turn_prompt.j2 index 04c135e..5f4947c 100644 --- a/server/codex_bridge/prompts/turn_prompt.j2 +++ b/server/codex_bridge/prompts/turn_prompt.j2 @@ -29,8 +29,8 @@ Tool usage: - Before finalizing, consider whether additional provided controls would materially improve tone, color, detail, crop, or noise; do not stop at basic exposure/contrast edits when stronger supported tools are available. - Always optimize toward refinement.goalText. {% if live_run_enabled %}Live run mode is enabled: use apply_operations for iterative edits inside this same run. -For this multi-turn path, you may pass `canonicalActions` to apply_operations instead of raw operations for these supported intent-level edits: `adjust-exposure`, `adjust-white-balance`, `recover-highlights`, `reduce-noise`, `grade-color`, `rotate-left`, `rotate-right`, `crop-normalized`, `crop-to-bounding-box`. -Canonical fields: `adjust-exposure` uses `exposureEv`; `adjust-white-balance` uses `temperatureDelta`, `tintDelta`, and/or `presetChoiceId`; `recover-highlights` and `reduce-noise` use `strength`; `grade-color` uses `target` + `amount`; `rotate-left` and `rotate-right` require no extra fields and rotate 90 degrees; `crop-normalized` uses `left`, `top`, `right`, `bottom` in normalized [0,1] coordinates. +For this multi-turn path, you may pass `canonicalActions` to apply_operations instead of raw operations for these supported intent-level edits: `adjust-exposure`, `adjust-white-balance`, `recover-highlights`, `reduce-noise`, `grade-color`, `rotate`, `crop-normalized`, `crop-to-bounding-box`. +Canonical fields: `adjust-exposure` uses `exposureEv`; `adjust-white-balance` uses `temperatureDelta`, `tintDelta`, and/or `presetChoiceId`; `recover-highlights` and `reduce-noise` use `strength`; `grade-color` uses `target` + `amount`; `rotate` uses signed `angleDegrees` for precise rotation deltas, with positive values rotating right and negative values rotating left; `crop-normalized` uses `left`, `top`, `right`, `bottom` in normalized [0,1] coordinates. For subject-centric crops, prefer `crop-to-bounding-box` over raw crop edges: provide `boxLeft`, `boxTop`, `boxWidth`, `boxHeight`, and optional `paddingRatio`, and the runtime will deterministically translate that box into concrete crop/clipping controls. The runtime binds supported canonical actions to concrete darktable controls deterministically before applying them. Inside each apply_operations call, operations are auto-applied one at a time with a fresh render after each step. diff --git a/server/tests/test_codex_app_server.py b/server/tests/test_codex_app_server.py index fa96932..14a6c20 100644 --- a/server/tests/test_codex_app_server.py +++ b/server/tests/test_codex_app_server.py @@ -883,8 +883,8 @@ def test_turn_prompt_tells_codex_to_infer_broad_edit_plan_from_visual_context() assert "you may pass `canonicalActions` to apply_operations" in prompt assert "adjust-exposure" in prompt assert "grade-color" in prompt - assert "rotate-left" in prompt - assert "rotate-right" in prompt + assert "rotate" in prompt + assert "angleDegrees" in prompt assert "crop-to-bounding-box" in prompt assert "boxLeft" in prompt assert "paddingRatio" in prompt @@ -969,6 +969,18 @@ def test_crop_to_bounding_box_requires_box_coordinates() -> None: ) +def test_rotate_requires_angle_degrees() -> None: + with pytest.raises(ValueError, match="rotate requires angleDegrees"): + AgentPlan.model_validate( + { + "assistantText": "Rotate slightly.", + "continueRefining": False, + "operations": [], + "canonicalActions": [{"action": "rotate"}], + } + ) + + def test_canonical_binder_resolves_supported_actions_without_raw_ids() -> None: request = _sample_request_with_canonical_controls() plan = AgentPlan.model_validate( @@ -1014,8 +1026,9 @@ def test_canonical_binder_resolves_supported_actions_without_raw_ids() -> None: "rationale": "Tighten framing.", }, { - "action": "rotate-right", - "rationale": "Turn the frame clockwise.", + "action": "rotate", + "angleDegrees": 2.5, + "rationale": "Straighten the frame slightly clockwise.", }, ], } @@ -1042,7 +1055,7 @@ def test_canonical_binder_resolves_supported_actions_without_raw_ids() -> None: assert bound_plan.operations[6].value.mode == "set" assert bound_plan.operations[6].value.number == pytest.approx(0.1) assert bound_plan.operations[10].value.mode == "delta" - assert bound_plan.operations[10].value.number == pytest.approx(90.0) + assert bound_plan.operations[10].value.number == pytest.approx(2.5) def test_canonical_binder_binds_rotate_actions_to_clipping_angle() -> None: @@ -1053,8 +1066,8 @@ def test_canonical_binder_binds_rotate_actions_to_clipping_angle() -> None: "continueRefining": False, "operations": [], "canonicalActions": [ - {"action": "rotate-left"}, - {"action": "rotate-right"}, + {"action": "rotate", "angleDegrees": -1.25}, + {"action": "rotate", "angleDegrees": 2.5}, ], } ) @@ -1070,8 +1083,8 @@ def test_canonical_binder_binds_rotate_actions_to_clipping_angle() -> None: "delta", ] assert [operation.value.number for operation in bound_plan.operations] == [ - pytest.approx(-90.0), - pytest.approx(90.0), + pytest.approx(-1.25), + pytest.approx(2.5), ] @@ -1908,7 +1921,8 @@ def _mock_wait(timeout=None, *, context=turn_context): "bottom": 0.9, }, { - "action": "rotate-right", + "action": "rotate", + "angleDegrees": 2.5, }, ] }, @@ -1929,7 +1943,7 @@ def _mock_wait(timeout=None, *, context=turn_context): ] == pytest.approx(0.1) assert turn_context.setting_by_id["setting.clipping.angle"][ "currentNumber" - ] == pytest.approx(90.0) + ] == pytest.approx(2.5) finally: bridge._clear_turn_context("thread-1", "turn-1") # type: ignore[attr-defined] diff --git a/shared/canonical_plan.py b/shared/canonical_plan.py index c879594..5e52e3f 100644 --- a/shared/canonical_plan.py +++ b/shared/canonical_plan.py @@ -10,8 +10,7 @@ "recover-highlights", "reduce-noise", "grade-color", - "rotate-left", - "rotate-right", + "rotate", "crop-normalized", "crop-to-bounding-box", ] @@ -39,6 +38,7 @@ class CanonicalEditAction(CanonicalBaseModel): noiseType: CanonicalNoiseType | None = None target: CanonicalGradeTarget | None = None amount: float | None = None + angleDegrees: float | None = None left: float | None = None top: float | None = None right: float | None = None @@ -77,8 +77,9 @@ def validate_action_shape(self) -> "CanonicalEditAction": raise ValueError("grade-color requires target") if self.amount is None: raise ValueError("grade-color requires amount") - elif self.action in {"rotate-left", "rotate-right"}: - pass + elif self.action == "rotate": + if self.angleDegrees is None: + raise ValueError("rotate requires angleDegrees") elif self.action == "crop-normalized": bounds = (self.left, self.top, self.right, self.bottom) if any(value is None for value in bounds):