From 4ade6e53320db55628b5479d919ba7d82d29e871 Mon Sep 17 00:00:00 2001 From: Marco Russo Date: Thu, 14 May 2026 15:19:14 +0200 Subject: [PATCH 1/4] =?UTF-8?q?bump:=20version=200.6.0=20=E2=86=92=200.7.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 131 +++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9493e7c7..9a1b3d9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,134 @@ +## v0.7.0 (2026-05-14) + +### โœจ Features + +- metrics results saving in json +- judge metrics visualization on local dashboard, strictness is now 1-avg(ASR) +- **evaluator**: metrics added on local dashbaord +- general bug fixing and improvement for all the attacks and the local dashboard +- Local dashboard now works both in remote and in local mode. +- Adding local dashboard features +- Automatic Ollama setup with 'hackagent examples ollama' +- Added Ollama demo +- Added CipherCheat attack +- Added CipherCheat attack +- Added CipherChat attack +- Added PAP attack +- Updated attack list in TUI +- H4RM3L attack added + +### ๐Ÿ›๐Ÿš‘๏ธ Fixes + +- **evaluator**: reformated file +- **evaluator**: safely handle non-dict rows and update orchestrator test +- fixed bugs on all the attacks, local dashboard improved, retry mechanism implemented in openai requests +- Fixed documentation +- Fixed documentation +- Bug fixing for PAIR and baseline +- Fixed remote fetching for local dashboard +- Fixed TAP test +- Fixed TAP test +- Unit tests fixed +- Fixed API key init error +- Allow for empty API key +- Added CipherChat attack to TUI +- Fixed tests that made pytest loop +- Fixed result ordering, date and fetching. Added "Attack" column with the type of the attack +- Fixed result ordering, date and fetching. Added "Attack" column with the type of the attack +- Fixed startup error for local web app +- **docs**: fixing tests +- **docs**: can we please fix the docs +- **docs**: compilation of documentation +- **docs**: fixing docs error +- **docs**: building docs + +### โ™ป๏ธ Refactorings + +- **standardize-attack-config**: standardization for each attack configuration + +### bump + +- **deps**: bump datasets from 4.8.3 to 4.8.4 +- **deps-dev**: bump ruff from 0.15.7 to 0.15.8 +- **deps**: bump litellm from 1.82.6 to 1.83.0 +- **deps**: bump textual from 8.1.1 to 8.2.1 +- **deps-dev**: bump pytest-cov from 7.0.0 to 7.1.0 +- **deps-dev**: bump anyio from 4.12.1 to 4.13.0 +- **deps-dev**: bump google-adk from 1.27.1 to 1.27.3 +- **deps**: bump litellm from 1.82.4 to 1.82.6 +- **deps**: bump nicegui from 3.8.0 to 3.9.0 +- **deps-dev**: bump ruff from 0.15.6 to 0.15.7 +- **deps**: bump datasets from 4.8.2 to 4.8.3 +- **deps**: bump openai from 2.28.0 to 2.29.0 +- **deps-dev**: bump google-adk from 1.27.0 to 1.27.1 +- **deps**: bump datasets from 4.7.0 to 4.8.2 +- **deps**: bump litellm from 1.82.1 to 1.82.3 +- **deps**: bump pypdf from 6.7.5 to 6.9.1 + +### ci + +- split tests into focused jobs and merge coverage +- scope test-matrix and test-quick to tests/unit/ only + +### fix + +- **ci**: use find instead of glob to locate .coverage files +- **ci**: include hidden .coverage files in upload artifacts +- **ci**: correct coverage artifact glob path +- use isinstance(next_page, (str, AnyUrl)) to avoid infinite pagination loop +- correct AnyUrl pagination check in RemoteBackend list methods +- **tests**: update test_update_result_status_function to use backend kwarg +- use backend.update_result() in baseline legacy evaluation sync path +- **e2e**: skip auth test when HACKAGENT_API_BASE_URL not explicitly set +- update attack techniques to use _backend config key and Tracker(backend=...) +- **tests**: pass backend=RemoteBackend(client) to AgentRouter in integration tests +- **remote**: use .next instead of .next_ on PaginatedAgentList +- **docs**: set markdown format:detect so .md files skip MDX parsing +- **docs**: use HTML entities instead of backslash escapes for MDX v3 compatibility +- **ci**: ruff format, F821 undefined name, F841 unused variable + +### refactor + +- standardize attack config naming + +### style + +- ruff format remote.py +- remove unused patch import from test_evaluation_updates +- fix ruff formatting in bon/generation.py and pap/generation.py +- **tests**: apply ruff formatting to integration adapter tests + +### โœ…๐Ÿคก๐Ÿงช Tests + +- Fixed test_tap.py +- **docs**: fixing docs tests + +### ๐ŸŽจ๐Ÿ—๏ธ Style & Architecture + +- reformatting +- formatting +- Fixed tests and linting +- **local-api**: local version of the storage that does not require api connection +- **local-api**: local version of the storage that does not require api connection + +### ๐Ÿ’š๐Ÿ‘ท CI & Build + +- Fixed integration tests + +### ๐Ÿ“๐Ÿ’ก Documentation + +- **build**: fixing build error + +### ๐Ÿ”ฅโšฐ๏ธ Clean up + +- Removed e2e PAIR test +- Removed unnecessary tests +- Removed original codebase of PAP + +### ๐Ÿซฅ fixup + +- Fixed merge + ## v0.6.0 (2026-03-14) ### โœจ Features diff --git a/pyproject.toml b/pyproject.toml index f6b5f933..6836ba1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hackagent" -version = "0.6.0" +version = "0.7.0" description = "HackAgent is an open-source security toolkit to detect vulnerabilities of your AI Agents." authors = [ {name = "AI Security Lab", email = "ais@ai4i.it"} From f091fe2f8f87926f6d64ca0a83948a8be4e5abe8 Mon Sep 17 00:00:00 2001 From: Marco Russo Date: Thu, 14 May 2026 15:23:45 +0200 Subject: [PATCH 2/4] =?UTF-8?q?bump:=20version=200.7.0=20=E2=86=92=200.8.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a1b3d9d..c519c2d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,61 @@ +## v0.8.0 (2026-05-14) + +### โœจ Features + +- Improved attacks, updated documentation and dashboard +- add attack configuration flow to TUI +- add attack configuration flow to TUI + +### ๐Ÿ›๐Ÿš‘๏ธ Fixes + +- correct api configuration for all roles in all attacks in tui + +### build + +- **deps**: bump authlib from 1.6.6 to 1.6.9 +- **deps**: bump authlib from 1.6.6 to 1.6.9 + +### bump + +- **deps-dev**: bump transformers from 4.57.6 to 5.5.4 +- **deps**: bump litellm from 1.83.0 to 1.83.10 +- **deps**: bump textual from 8.2.1 to 8.2.4 +- **deps-dev**: bump pytest from 9.0.2 to 9.0.3 +- **deps-dev**: bump google-adk from 1.28.0 to 1.31.0 +- **deps**: bump rich from 14.3.3 to 15.0.0 +- **deps-dev**: bump commitizen from 4.13.9 to 4.13.10 +- **deps**: bump click from 8.3.1 to 8.3.2 +- **deps-dev**: bump mcp from 1.26.0 to 1.27.0 +- **deps-dev**: bump requests from 2.32.5 to 2.33.1 +- **deps-dev**: bump ruff from 0.15.8 to 0.15.9 +- **deps**: bump openai from 2.29.0 to 2.30.0 +- **deps-dev**: bump google-adk from 1.27.3 to 1.28.0 + +### feat + +- propagate adapter/execution errors in AutoDAN-Turbo results +- propagate adapter/execution errors in TAP attack results +- propagate adapter/execution errors in PAP attack results +- propagate adapter/execution errors in PAIR attack results + +### fix + +- propagate adapter/execution errors to dashboard instead of masking as failed attacks +- **advprefix**: propagate errors to results instead of marking as mitigated Error rows (e.g. timeouts) were silently lost through the evaluation pipeline and finalized as FAILED_JAILBREAK ("Mitigated") instead of ERROR_AGENT_RESPONSE. Root causes fixed: - completions.py: propagate the normalized 'error' key so _detect_error_indices can identify error rows downstream - evaluation.py: detect/mark error rows before judge evaluation; preserve error rows through NLL filtering, aggregation, and selection so they reach finalize_all_goals with is_error=True - sync.py: skip is_error rows in sync_evaluation_to_server so the coordinator's ERROR_AGENT_RESPONSE is not overwritten by FAILED_JAILBREAK +- propagate BoN adapter errors as ERROR_AGENT_RESPONSE in dashboard +- propagate adapter/execution errors instead of masking them as failed attacks +- prevent orchestrator re-evaluation from zeroing jailbreak counts + +### refactor + +- unify dashboard labels, colors, and error reporting + +### ๐Ÿ“๐Ÿ’ก Documentation + +- fixed documentation +- fixed documentation +- documentation update + ## v0.7.0 (2026-05-14) ### โœจ Features diff --git a/pyproject.toml b/pyproject.toml index 6836ba1e..bdbee1db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hackagent" -version = "0.7.0" +version = "0.8.0" description = "HackAgent is an open-source security toolkit to detect vulnerabilities of your AI Agents." authors = [ {name = "AI Security Lab", email = "ais@ai4i.it"} diff --git a/uv.lock b/uv.lock index 1ba8e5cd..93f73734 100644 --- a/uv.lock +++ b/uv.lock @@ -2368,7 +2368,7 @@ wheels = [ [[package]] name = "hackagent" -version = "0.6.0" +version = "0.7.0" source = { editable = "." } dependencies = [ { name = "click" }, From 01c551b9aec25f207395ce58b1694e653277ab7a Mon Sep 17 00:00:00 2001 From: Marco Russo Date: Thu, 14 May 2026 15:26:06 +0200 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=94=96=20bump:=20new=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 93f73734..6d55e9a3 100644 --- a/uv.lock +++ b/uv.lock @@ -2368,7 +2368,7 @@ wheels = [ [[package]] name = "hackagent" -version = "0.7.0" +version = "0.8.0" source = { editable = "." } dependencies = [ { name = "click" }, From 209952a94f20991d16a6db7af1e6f0963c76f31c Mon Sep 17 00:00:00 2001 From: Marco Russo Date: Fri, 15 May 2026 09:46:03 +0200 Subject: [PATCH 4/4] =?UTF-8?q?bump:=20version=200.8.0=20=E2=86=92=200.9.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c519c2d3..4e133ca5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +## v0.9.0 (2026-05-15) + ## v0.8.0 (2026-05-14) ### โœจ Features diff --git a/pyproject.toml b/pyproject.toml index bdbee1db..820bd5a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hackagent" -version = "0.8.0" +version = "0.9.0" description = "HackAgent is an open-source security toolkit to detect vulnerabilities of your AI Agents." authors = [ {name = "AI Security Lab", email = "ais@ai4i.it"}