From c44c4cac39baa64ef86cb4c6ace92099a4cd79a4 Mon Sep 17 00:00:00 2001
From: Nicola Franco <n.francovia@gmail.com>
Date: Fri, 22 May 2026 20:01:59 +0200
Subject: [PATCH 1/6] fix: resolve top-level 'judge' dict before falling back
 to gpt-4-0613 default

When the orchestrator runs the post-attack evaluation pipeline, it calls
_resolve_judges_from_config() with no arguments on the attack_config.
The method correctly checked for a 'judges' list but skipped the common
'judge' dict format used by Ollama/local examples, falling through to
the hardcoded 'gpt-4-0613' default and crashing with a missing-credentials
error for users without an OpenAI API key.

Resolution order is now:
  1. 'judges' list in raw config
  2. 'judge' dict in raw config (wrapped in a list)
  3. technique_params fallback
  4. gpt-4-0613 / jailbreakbench hardcoded defaults

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 hackagent/attacks/evaluator/evaluation_step.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/hackagent/attacks/evaluator/evaluation_step.py b/hackagent/attacks/evaluator/evaluation_step.py
index ef68c080..a355f5b7 100644
--- a/hackagent/attacks/evaluator/evaluation_step.py
+++ b/hackagent/attacks/evaluator/evaluation_step.py
@@ -355,8 +355,11 @@ def _resolve_judges_from_config(
         """
         Resolve the judges list from ``_raw_config``.
 
-        If no top-level ``judges`` key is present, builds a single-judge
-        fallback from *technique_params* for backward compatibility.
+        Resolution order:
+        1. Top-level ``judges`` list in raw config.
+        2. Top-level ``judge`` dict in raw config (wrapped in a list).
+        3. ``technique_params["judge"]`` string (legacy fallback).
+        4. ``default_judge`` / ``default_type`` hardcoded defaults.
 
         Args:
             technique_params: Technique-specific params dict with legacy
@@ -371,6 +374,11 @@ def _resolve_judges_from_config(
         if isinstance(judges, list) and judges:
             return judges
 
+        # Use the top-level "judge" dict if present (e.g. from Ollama/local configs).
+        raw_judge = self._raw_config.get("judge")
+        if isinstance(raw_judge, dict) and raw_judge:
+            return [raw_judge]
+
         tp = technique_params or {}
         judge_model = tp.get("judge", default_judge)
         judge_type = tp.get("judge_type") or self.infer_judge_type(

From 35656c32986c32e84fd4f155d13842d17a1a8d90 Mon Sep 17 00:00:00 2001
From: Nicola Franco <n.francovia@gmail.com>
Date: Fri, 22 May 2026 20:23:27 +0200
Subject: [PATCH 2/6] fix: replace hardcoded OpenAI model defaults with local
 Ollama defaults

Remove all gpt-4/gpt-4o-mini hardcoded defaults from attacks, techniques,
and judge resolution so the tool works without any external API key.

Changes:
- evaluation_step._resolve_judges_from_config: default_judge now uses
  DEFAULT_JUDGE_IDENTIFIER (gemma3:4b via Ollama) with default_type
  'harmbench' instead of 'gpt-4-0613'/'jailbreakbench'. Also injects
  the Ollama endpoint/agent_type when the built-in default is used.
- flipattack/attack.py: goal metadata judge default changed from
  'gpt-4-0613' to DEFAULT_JUDGE_IDENTIFIER.
- cli/tui/attack_specs.py: PAIR attacker default changed from 'gpt-4'
  and PAP attacker default changed from 'gpt-4o-mini' to
  DEFAULT_ATTACKER_IDENTIFIER (gemma3:4b).
---
 hackagent/attacks/evaluator/evaluation_step.py    | 14 ++++++++++++--
 hackagent/attacks/techniques/flipattack/attack.py |  3 ++-
 hackagent/cli/tui/attack_specs.py                 |  9 +++++++--
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/hackagent/attacks/evaluator/evaluation_step.py b/hackagent/attacks/evaluator/evaluation_step.py
index a355f5b7..a05daada 100644
--- a/hackagent/attacks/evaluator/evaluation_step.py
+++ b/hackagent/attacks/evaluator/evaluation_step.py
@@ -51,6 +51,11 @@ def execute(self, input_data):
 from hackagent.attacks.shared.router_factory import extract_passthrough_request_config
 from hackagent.attacks.evaluator.sync import sync_evaluation_to_server
 from hackagent.attacks.techniques.advprefix.config import EvaluatorConfig
+from hackagent.attacks.techniques.config import (
+    DEFAULT_JUDGE_IDENTIFIER,
+    DEFAULT_LOCAL_AGENT_TYPE,
+    DEFAULT_LOCAL_MODEL_ENDPOINT,
+)
 from hackagent.server.client import AuthenticatedClient
 from hackagent.router.types import AgentTypeEnum
 
@@ -349,8 +354,8 @@ def _build_base_eval_config(
     def _resolve_judges_from_config(
         self,
         technique_params: Optional[Dict[str, Any]] = None,
-        default_judge: str = "gpt-4-0613",
-        default_type: str = "jailbreakbench",
+        default_judge: str = DEFAULT_JUDGE_IDENTIFIER,
+        default_type: str = "harmbench",
     ) -> List[Dict[str, Any]]:
         """
         Resolve the judges list from ``_raw_config``.
@@ -388,6 +393,11 @@ def _resolve_judges_from_config(
             "identifier": judge_model,
             "type": judge_type,
         }
+        # For the built-in local default, inject Ollama connectivity so it
+        # works out-of-the-box without any API key.
+        if judge_model == DEFAULT_JUDGE_IDENTIFIER:
+            fallback.setdefault("endpoint", DEFAULT_LOCAL_MODEL_ENDPOINT)
+            fallback.setdefault("agent_type", DEFAULT_LOCAL_AGENT_TYPE)
         for key in (
             "endpoint",
             "agent_type",
diff --git a/hackagent/attacks/techniques/flipattack/attack.py b/hackagent/attacks/techniques/flipattack/attack.py
index 8b41db5a..1dcba302 100644
--- a/hackagent/attacks/techniques/flipattack/attack.py
+++ b/hackagent/attacks/techniques/flipattack/attack.py
@@ -41,6 +41,7 @@
 from hackagent.router.router import AgentRouter
 from hackagent.attacks.techniques.base import BaseAttack
 from hackagent.attacks.shared.tui import with_tui_logging
+from hackagent.attacks.techniques.config import DEFAULT_JUDGE_IDENTIFIER
 
 from . import generation, evaluation
 from .config import DEFAULT_FLIPATTACK_CONFIG
@@ -461,7 +462,7 @@ def run(self, goals: List[str]) -> List[Dict]:
             "cot": flipattack_params.get("cot", False),
             "lang_gpt": flipattack_params.get("lang_gpt", False),
             "few_shot": flipattack_params.get("few_shot", False),
-            "judge": flipattack_params.get("judge", "gpt-4-0613"),
+            "judge": flipattack_params.get("judge", DEFAULT_JUDGE_IDENTIFIER),
         }
 
         # Initialize goal contexts upfront so goal elapsed_s covers the full
diff --git a/hackagent/cli/tui/attack_specs.py b/hackagent/cli/tui/attack_specs.py
index 772df0bc..80fadd26 100644
--- a/hackagent/cli/tui/attack_specs.py
+++ b/hackagent/cli/tui/attack_specs.py
@@ -26,6 +26,11 @@
 from enum import Enum
 from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
+from hackagent.attacks.techniques.config import (
+    DEFAULT_ATTACKER_IDENTIFIER,
+    DEFAULT_JUDGE_IDENTIFIER,
+)
+
 
 # =====================================================================
 # Field / Spec primitives
@@ -578,7 +583,7 @@ def get_all_attack_specs() -> Dict[str, AttackConfigSpec]:
                 key="attacker.model",
                 label="Attacker Model",
                 field_type=FieldType.STRING,
-                default="gpt-4",
+                default=DEFAULT_ATTACKER_IDENTIFIER,
                 description="Model ID for the attacker LLM that generates prompts.",
                 section="Attacker LLM",
             ),
@@ -1305,7 +1310,7 @@ def get_all_attack_specs() -> Dict[str, AttackConfigSpec]:
                 key="attacker.identifier",
                 label="Attacker Model",
                 field_type=FieldType.STRING,
-                default="gpt-4o-mini",
+                default=DEFAULT_ATTACKER_IDENTIFIER,
                 description="Model identifier for persuasive paraphrasing.",
                 section="Attacker LLM",
             ),

From 2da8f66e7d9416cfa8816e839eee066d454a166f Mon Sep 17 00:00:00 2001
From: Nicola Franco <n.francovia@gmail.com>
Date: Fri, 22 May 2026 20:29:04 +0200
Subject: [PATCH 3/6] =?UTF-8?q?bump:=20version=200.10.0=20=E2=86=92=200.10?=
 =?UTF-8?q?.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md   | 8 ++++++++
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6b7a995b..1420e940 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## v0.10.1 (2026-05-22)
+
+### fix
+
+- replace hardcoded OpenAI model defaults with local Ollama defaults
+- resolve top-level 'judge' dict before falling back to gpt-4-0613 default
+- move examples/ inside hackagent package for correct wheel packaging
+
 ## v0.10.0 (2026-05-22)
 
 ### ✨ Features
diff --git a/pyproject.toml b/pyproject.toml
index 2fef50df..47a1d729 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "hackagent"
-version = "0.10.0"
+version = "0.10.1"
 description = "HackAgent is an open-source security toolkit to detect vulnerabilities of your AI Agents."
 authors = [
     {name = "AI Security Lab", email = "ais@ai4i.it"}
diff --git a/uv.lock b/uv.lock
index b4c831c5..a6ce3655 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2368,7 +2368,7 @@ wheels = [
 
 [[package]]
 name = "hackagent"
-version = "0.9.1"
+version = "0.10.0"
 source = { editable = "." }
 dependencies = [
     { name = "click" },

From bc911ea7334bd8076dcdee76e1915fd6c66ecef6 Mon Sep 17 00:00:00 2001
From: Nicola Franco <n.francovia@gmail.com>
Date: Fri, 22 May 2026 20:40:43 +0200
Subject: [PATCH 4/6] chore(ci): add dependabot auto-merge workflow

Enables auto-merge (squash) for all dependabot PRs targeting main.
GitHub will merge automatically once all required CI checks pass.
---
 .github/workflows/dependabot-automerge.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .github/workflows/dependabot-automerge.yml

diff --git a/.github/workflows/dependabot-automerge.yml b/.github/workflows/dependabot-automerge.yml
new file mode 100644
index 00000000..d7529fa3
--- /dev/null
+++ b/.github/workflows/dependabot-automerge.yml
@@ -0,0 +1,21 @@
+name: Dependabot Auto-merge
+
+on:
+  pull_request:
+    branches: ["main"]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  auto-merge:
+    name: Auto-merge Dependabot PR
+    runs-on: ubuntu-latest
+    if: github.actor == 'dependabot[bot]'
+    steps:
+      - name: Enable auto-merge
+        run: gh pr merge --auto --squash "$PR_URL"
+        env:
+          PR_URL: ${{ github.event.pull_request.html_url }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 033e4e09982c8785da8567f3dd6e0837d878d2ac Mon Sep 17 00:00:00 2001
From: Nicola Franco <n.francovia@gmail.com>
Date: Fri, 22 May 2026 20:44:23 +0200
Subject: [PATCH 5/6] fix(ci): remove unused DEFAULT_JUDGE_IDENTIFIER import
 from attack_specs

---
 hackagent/cli/tui/attack_specs.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/hackagent/cli/tui/attack_specs.py b/hackagent/cli/tui/attack_specs.py
index 80fadd26..9e07ea2e 100644
--- a/hackagent/cli/tui/attack_specs.py
+++ b/hackagent/cli/tui/attack_specs.py
@@ -26,10 +26,7 @@
 from enum import Enum
 from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
-from hackagent.attacks.techniques.config import (
-    DEFAULT_ATTACKER_IDENTIFIER,
-    DEFAULT_JUDGE_IDENTIFIER,
-)
+from hackagent.attacks.techniques.config import DEFAULT_ATTACKER_IDENTIFIER
 
 
 # =====================================================================

From be51dfb4d6a92a447345f4d6db1958984f17ae6b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 22 May 2026 19:00:26 +0000
Subject: [PATCH 6/6] Fix stale default judge expectations in evaluation-step
 integration test

---
 tests/integration/attacks/test_evaluation_step.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/integration/attacks/test_evaluation_step.py b/tests/integration/attacks/test_evaluation_step.py
index aec5cfd6..52d279f4 100644
--- a/tests/integration/attacks/test_evaluation_step.py
+++ b/tests/integration/attacks/test_evaluation_step.py
@@ -41,6 +41,7 @@
     JUDGE_TYPE_LABELS,
     MERGE_KEYS,
 )
+from hackagent.attacks.techniques.config import DEFAULT_JUDGE_IDENTIFIER
 from hackagent.router.types import AgentTypeEnum
 
 logger = logging.getLogger(__name__)
@@ -297,9 +298,9 @@ def test_fallback_to_defaults_with_no_params(self):
         judges = step._resolve_judges_from_config()
 
         assert len(judges) == 1
-        assert judges[0]["identifier"] == "gpt-4-0613"
-        # default_type in _resolve_judges_from_config is "jailbreakbench"
-        assert judges[0]["type"] == "jailbreakbench"
+        assert judges[0]["identifier"] == DEFAULT_JUDGE_IDENTIFIER
+        # default_type in _resolve_judges_from_config is "harmbench"
+        assert judges[0]["type"] == "harmbench"
 
     def test_multiple_judges(self):
         """Test with multiple judges configured."""