diff --git a/Flames/H-2026-001.md b/Flames/H200.md similarity index 99% rename from Flames/H-2026-001.md rename to Flames/H200.md index 0db71118..fe169d4b 100644 --- a/Flames/H-2026-001.md +++ b/Flames/H200.md @@ -1,4 +1,4 @@ -# H-2026-001 +# H200 Threat actors are using DLL side-loading to force legitimate signed VirtualBox executables (DbgView.exe) to load malicious DLLs (vboxrt.dll) that download additional malicious code from attacker-controlled servers to evade security monitoring and achieve privileged code execution. diff --git a/Flames/H-2026-002.md b/Flames/H201.md similarity index 99% rename from Flames/H-2026-002.md rename to Flames/H201.md index 72d14c5f..338be1b3 100644 --- a/Flames/H-2026-002.md +++ b/Flames/H201.md @@ -1,4 +1,4 @@ -# H-2026-002 +# H201 Adversaries are creating domain-level content compliance rules with regular expression patterns matching strategic intelligence keywords to silently BCC-forward emails to external attacker-controlled Gmail accounts for covert data exfiltration. diff --git a/Flames/H-2026-003.md b/Flames/H202.md similarity index 99% rename from Flames/H-2026-003.md rename to Flames/H202.md index 2ef169e9..cb692119 100644 --- a/Flames/H-2026-003.md +++ b/Flames/H202.md @@ -1,4 +1,4 @@ -# H-2026-003 +# H202 Threat actors are exfiltrating API credentials stored in JetBrains IDE plugin settings by transmitting them unencrypted over HTTP to a hardcoded command and control server at 39.107.60[.]51 immediately after users click "Apply" in the plugin configuration interface. diff --git a/Flames/H-2026-004.md b/Flames/H203.md similarity index 99% rename from Flames/H-2026-004.md rename to Flames/H203.md index a3fed44f..1a678341 100644 --- a/Flames/H-2026-004.md +++ b/Flames/H203.md @@ -1,4 +1,4 @@ -# H-2026-004 +# H203 Adversaries are sending crafted requests to bypass peering authentication mechanisms on Cisco Catalyst SD-WAN Controllers, Managers, and Validators to establish unauthorized control connections with state "up" and zero challenge-ack values in connection statistics, enabling administrative access to NETCONF for SD-WAN fabric manipulation. diff --git a/hunts-data.js b/hunts-data.js index ff4bbfda..dc28c22b 100644 --- a/hunts-data.js +++ b/hunts-data.js @@ -748,113 +748,6 @@ const HUNTS_DATA = [ "references": "- [MITRE ATT&CK T1543.003 — Create or Modify System Process: Windows Service](https://attack.mitre.org/techniques/T1543/003/)\n- [source report — ESET: Killing me gently — Inside Gentlemen's EDR killer framework](https://www.welivesecurity.com/en/eset-research/killing-me-gently-inside-gentlemens-edr-killer-framework/)\n- [LOLDrivers.io — Living Off The Land Drivers (allowlist/blocklist source: hashes, signers, Sysmon config)](https://www.loldrivers.io/)\n- [LOLDrivers — Sysmon config of vulnerable hashes (baseline enrichment)](https://github.com/magicsword-io/LOLDrivers/blob/main/detections/sysmon/sysmon_config_vulnerable_hashes.xml)\n- [TrustedSec — Sysmon Community Guide: Driver Loading (EID 6 fields, CheckRevocation)](https://github.com/trustedsec/SysmonCommunityGuide/blob/master/chapters/driver-loading.md)\n- [Splunk — These Are The Drivers You Are Looking For: Detect and Prevent Malicious Drivers](https://www.splunk.com/en_us/blog/security/these-are-the-drivers-you-are-looking-for-detect-and-prevent-malicious-drivers.html)", "file_path": "Embers/B026.md" }, - { - "id": "H-2026-001", - "category": "Flames", - "title": "Threat actors are using DLL side-loading to force legitimate signed VirtualBox executables (DbgView.exe) to load malicious DLLs (vboxrt.dll) that download additional malicious code from attacker-controlled servers to evade security monitoring and achieve privileged code execution.", - "tactic": "Defense Evasion", - "notes": "Based on ATT&CK technique T1574.002. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "defense_evasion", - "dll_sideloading", - "virtualbox", - "dragonforce", - "T1574.002" - ], - "techniques": [ - "T1574.002" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "Badger", - "link": "" - }, - "why": "- DLL side-loading allows attackers to bypass application control and security monitoring by leveraging the trust placed in legitimate, signed executables like VirtualBox/DbgView, making malicious activity appear benign\n- This technique provides attackers with privileged code execution context inherited from the trusted VirtualBox process, enabling them to perform reconnaissance, download additional tools, and establish persistence while evading detection\n- The DragonForce ransomware group (Hackledorb) weaponized this technique as part of a sophisticated multi-month intrusion that ultimately led to data exfiltration and encryption, demonstrating how DLL side-loading serves as a critical enabler for advanced persistent threats\n- Detecting this behavior is particularly important because it represents an early-stage defense evasion technique that, if caught, can prevent the deployment of more sophisticated tools like Backdoor.Turn and the eventual ransomware payload", - "references": "- [MITRE ATT&CK T1574.002 - Hijack Execution Flow: DLL Side-Loading](https://attack.mitre.org/techniques/T1574/002/)\n- [Source CTI Report - Hidden in Teams: DragonForce Attackers Weaponize Microsoft Teams Relays to Stay Hidden](https://symantec-enterprise-blogs.security.com/threat-intelligence/dragonforce-msteams-backdoor)", - "file_path": "Flames/H-2026-001.md" - }, - { - "id": "H-2026-002", - "category": "Flames", - "title": "Adversaries are creating domain-level content compliance rules with regular expression patterns matching strategic intelligence keywords to silently BCC-forward emails to external attacker-controlled Gmail accounts for covert data exfiltration.", - "tactic": "Collection", - "notes": "Based on ATT&CK technique T1114.003. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "collection", - "email", - "china", - "unc6508", - "T1114.003" - ], - "techniques": [ - "T1114.003" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "T3chn3", - "link": "" - }, - "why": "- This technique represents a novel and highly effective method for persistent, automated data exfiltration that operates silently without user awareness or typical email forwarding indicators\n- Content compliance rules are legitimate administrative features that bypass traditional email security controls and DLP solutions, making detection significantly more challenging than standard email forwarding rules\n- UNC6508 successfully used this technique to exfiltrate strategic intelligence on defense operations, AI research, military health, and Indo-Pacific command for over a year undetected, demonstrating its effectiveness for long-term espionage campaigns\n- The technique provides continuous, real-time access to sensitive communications matching specific intelligence requirements without requiring repeated interactive access to compromised accounts\n- Detection of unauthorized compliance rules can reveal compromised administrator accounts and prevent ongoing data loss across entire organizational units rather than individual mailboxes", - "references": "- [MITRE ATT&CK T1114.003 - Email Collection: Email Forwarding Rule](https://attack.mitre.org/techniques/T1114/003/)\n- [Source CTI Report](https://cloud.google.com/blog/topics/threat-intelligence/prc-targets-us-medical-research)", - "file_path": "Flames/H-2026-002.md" - }, - { - "id": "H-2026-003", - "category": "Flames", - "title": "Threat actors are exfiltrating API credentials stored in JetBrains IDE plugin settings by transmitting them unencrypted over HTTP to a hardcoded command and control server at 39.107.60[.]51 immediately after users click \"Apply\" in the plugin configuration interface.", - "tactic": "Credential Access", - "notes": "Based on ATT&CK technique T1552.001. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "credential_access", - "jetbrains", - "api_keys", - "T1552.001" - ], - "techniques": [ - "T1552.001" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "Shilpa Merlin Joy", - "link": "https://github.com/shilpamerlin/" - }, - "why": "- This campaign demonstrates a novel supply chain attack vector targeting developer tools, with at least 15 malicious plugins accumulating nearly 70,000 installations across the JetBrains Marketplace\n- Stolen AI API keys provide attackers with immediate access to paid AI services, enabling both direct financial fraud and potential data exfiltration through AI model interactions\n- The unencrypted HTTP transmission to a hardcoded IP address creates a highly detectable network signature that can identify compromised developer workstations before credentials are monetized\n- Detection of this behavior protects high-value AI API credentials that could be resold or used to conduct further attacks, as evidenced by the campaign's suspected credential redistribution scheme to paid plugin users", - "references": "- [MITRE ATT&CK T1552.001 - Unsecured Credentials: Credentials In Files](https://attack.mitre.org/techniques/T1552/001/)\n- [Source CTI Report](https://www.bleepingcomputer.com/news/security/malicious-jetbrains-marketplace-plugins-steal-ai-api-keys-from-developers/)", - "file_path": "Flames/H-2026-003.md" - }, - { - "id": "H-2026-004", - "category": "Flames", - "title": "Adversaries are sending crafted requests to bypass peering authentication mechanisms on Cisco Catalyst SD-WAN Controllers, Managers, and Validators to establish unauthorized control connections with state \"up\" and zero challenge-ack values in connection statistics, enabling administrative access to NETCONF for SD-WAN fabric manipulation.", - "tactic": "Defense Evasion", - "notes": "Based on ATT&CK technique T1562.006. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "defense_evasion", - "sdwan", - "cisco", - "authentication_bypass", - "T1562.006" - ], - "techniques": [ - "T1562.006" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "young6x7", - "link": "" - }, - "why": "- This authentication bypass vulnerability (CVE-2026-20182) allows unauthenticated remote attackers to gain high-privileged administrative access to SD-WAN infrastructure, representing a critical security control failure\n- Successful exploitation enables attackers to manipulate network configurations across the entire SD-WAN fabric through NETCONF access, potentially affecting routing, security policies, and network segmentation\n- Cisco PSIRT confirmed limited active exploitation in May 2026, making this an immediate threat requiring detection capabilities beyond signature-based approaches\n- The specific indicator of \"state:up\" with \"challenge-ack 0\" in control connection statistics provides a concrete, observable artifact that distinguishes malicious authentication bypass from legitimate peering events\n- Internet-exposed SD-WAN controllers are at highest risk, and the vulnerability affects all deployment types including on-premises, cloud-managed, and government implementations", - "references": "- [MITRE ATT&CK T1562.006 - Impair Defenses: Indicator Blocking](https://attack.mitre.org/techniques/T1562/006/)\n- [Source CTI Report](https://sec.cloudapps.cisco.com/security/center/content/CiscoSecurityAdvisory/cisco-sa-sdwan-rpa2-v69WY2SW)", - "file_path": "Flames/H-2026-004.md" - }, { "id": "H001", "category": "Flames", @@ -6590,6 +6483,113 @@ const HUNTS_DATA = [ "references": "- [MITRE ATT&CK T1195.002 — Supply Chain Compromise: Compromise Software Supply Chain](https://attack.mitre.org/techniques/T1195/002/)\n- [MITRE ATT&CK T1059 — Command and Scripting Interpreter](https://attack.mitre.org/techniques/T1059/)\n- [source report — Microsoft: Mastra npm supply chain compromise (Sapphire Sleet)](https://www.microsoft.com/en-us/security/blog/2026/06/17/postinstall-payload-inside-mastra-npm-supply-chain-compromise/)\n- [Splunk — Defending Against npm Supply Chain Attacks: Detection, Emulation, and Analysis (SPL for npm lifecycle script abuse)](https://www.splunk.com/en_us/blog/security/npm-supply-chain-attack-detection-analysis.html)\n- [Undercode Testing — Hunt Down Malicious npm Packages: a KQL Masterclass (node/npm lineage + lifecycle hooks)](https://undercodetesting.com/unmask-the-invisible-hunt-down-malicious-npm-packages-with-this-kql-masterclass/)\n- [Microsoft — Mitigating the Axios npm supply chain compromise (postinstall dropper, %PROGRAMDATA% LOLBin staging)](https://www.microsoft.com/en-us/security/blog/2026/04/01/mitigating-the-axios-npm-supply-chain-compromise/)\n- [NVISO — The Axios npm supply chain incident: fake dependency, real backdoor (install-time behavioral detection)](https://blog.nviso.eu/2026/04/03/the-axios-npm-supply-chain-incident-fake-dependency-real-backdoor/)", "file_path": "Flames/H199.md" }, + { + "id": "H200", + "category": "Flames", + "title": "Threat actors are using DLL side-loading to force legitimate signed VirtualBox executables (DbgView.exe) to load malicious DLLs (vboxrt.dll) that download additional malicious code from attacker-controlled servers to evade security monitoring and achieve privileged code execution.", + "tactic": "Defense Evasion", + "notes": "Based on ATT&CK technique T1574.002. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "defense_evasion", + "dll_sideloading", + "virtualbox", + "dragonforce", + "T1574.002" + ], + "techniques": [ + "T1574.002" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "Badger", + "link": "" + }, + "why": "- DLL side-loading allows attackers to bypass application control and security monitoring by leveraging the trust placed in legitimate, signed executables like VirtualBox/DbgView, making malicious activity appear benign\n- This technique provides attackers with privileged code execution context inherited from the trusted VirtualBox process, enabling them to perform reconnaissance, download additional tools, and establish persistence while evading detection\n- The DragonForce ransomware group (Hackledorb) weaponized this technique as part of a sophisticated multi-month intrusion that ultimately led to data exfiltration and encryption, demonstrating how DLL side-loading serves as a critical enabler for advanced persistent threats\n- Detecting this behavior is particularly important because it represents an early-stage defense evasion technique that, if caught, can prevent the deployment of more sophisticated tools like Backdoor.Turn and the eventual ransomware payload", + "references": "- [MITRE ATT&CK T1574.002 - Hijack Execution Flow: DLL Side-Loading](https://attack.mitre.org/techniques/T1574/002/)\n- [Source CTI Report - Hidden in Teams: DragonForce Attackers Weaponize Microsoft Teams Relays to Stay Hidden](https://symantec-enterprise-blogs.security.com/threat-intelligence/dragonforce-msteams-backdoor)", + "file_path": "Flames/H200.md" + }, + { + "id": "H201", + "category": "Flames", + "title": "Adversaries are creating domain-level content compliance rules with regular expression patterns matching strategic intelligence keywords to silently BCC-forward emails to external attacker-controlled Gmail accounts for covert data exfiltration.", + "tactic": "Collection", + "notes": "Based on ATT&CK technique T1114.003. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "collection", + "email", + "china", + "unc6508", + "T1114.003" + ], + "techniques": [ + "T1114.003" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "T3chn3", + "link": "" + }, + "why": "- This technique represents a novel and highly effective method for persistent, automated data exfiltration that operates silently without user awareness or typical email forwarding indicators\n- Content compliance rules are legitimate administrative features that bypass traditional email security controls and DLP solutions, making detection significantly more challenging than standard email forwarding rules\n- UNC6508 successfully used this technique to exfiltrate strategic intelligence on defense operations, AI research, military health, and Indo-Pacific command for over a year undetected, demonstrating its effectiveness for long-term espionage campaigns\n- The technique provides continuous, real-time access to sensitive communications matching specific intelligence requirements without requiring repeated interactive access to compromised accounts\n- Detection of unauthorized compliance rules can reveal compromised administrator accounts and prevent ongoing data loss across entire organizational units rather than individual mailboxes", + "references": "- [MITRE ATT&CK T1114.003 - Email Collection: Email Forwarding Rule](https://attack.mitre.org/techniques/T1114/003/)\n- [Source CTI Report](https://cloud.google.com/blog/topics/threat-intelligence/prc-targets-us-medical-research)", + "file_path": "Flames/H201.md" + }, + { + "id": "H202", + "category": "Flames", + "title": "Threat actors are exfiltrating API credentials stored in JetBrains IDE plugin settings by transmitting them unencrypted over HTTP to a hardcoded command and control server at 39.107.60[.]51 immediately after users click \"Apply\" in the plugin configuration interface.", + "tactic": "Credential Access", + "notes": "Based on ATT&CK technique T1552.001. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "credential_access", + "jetbrains", + "api_keys", + "T1552.001" + ], + "techniques": [ + "T1552.001" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "Shilpa Merlin Joy", + "link": "https://github.com/shilpamerlin/" + }, + "why": "- This campaign demonstrates a novel supply chain attack vector targeting developer tools, with at least 15 malicious plugins accumulating nearly 70,000 installations across the JetBrains Marketplace\n- Stolen AI API keys provide attackers with immediate access to paid AI services, enabling both direct financial fraud and potential data exfiltration through AI model interactions\n- The unencrypted HTTP transmission to a hardcoded IP address creates a highly detectable network signature that can identify compromised developer workstations before credentials are monetized\n- Detection of this behavior protects high-value AI API credentials that could be resold or used to conduct further attacks, as evidenced by the campaign's suspected credential redistribution scheme to paid plugin users", + "references": "- [MITRE ATT&CK T1552.001 - Unsecured Credentials: Credentials In Files](https://attack.mitre.org/techniques/T1552/001/)\n- [Source CTI Report](https://www.bleepingcomputer.com/news/security/malicious-jetbrains-marketplace-plugins-steal-ai-api-keys-from-developers/)", + "file_path": "Flames/H202.md" + }, + { + "id": "H203", + "category": "Flames", + "title": "Adversaries are sending crafted requests to bypass peering authentication mechanisms on Cisco Catalyst SD-WAN Controllers, Managers, and Validators to establish unauthorized control connections with state \"up\" and zero challenge-ack values in connection statistics, enabling administrative access to NETCONF for SD-WAN fabric manipulation.", + "tactic": "Defense Evasion", + "notes": "Based on ATT&CK technique T1562.006. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "defense_evasion", + "sdwan", + "cisco", + "authentication_bypass", + "T1562.006" + ], + "techniques": [ + "T1562.006" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "young6x7", + "link": "" + }, + "why": "- This authentication bypass vulnerability (CVE-2026-20182) allows unauthenticated remote attackers to gain high-privileged administrative access to SD-WAN infrastructure, representing a critical security control failure\n- Successful exploitation enables attackers to manipulate network configurations across the entire SD-WAN fabric through NETCONF access, potentially affecting routing, security policies, and network segmentation\n- Cisco PSIRT confirmed limited active exploitation in May 2026, making this an immediate threat requiring detection capabilities beyond signature-based approaches\n- The specific indicator of \"state:up\" with \"challenge-ack 0\" in control connection statistics provides a concrete, observable artifact that distinguishes malicious authentication bypass from legitimate peering events\n- Internet-exposed SD-WAN controllers are at highest risk, and the vulnerability affects all deployment types including on-premises, cloud-managed, and government implementations", + "references": "- [MITRE ATT&CK T1562.006 - Impair Defenses: Indicator Blocking](https://attack.mitre.org/techniques/T1562/006/)\n- [Source CTI Report](https://sec.cloudapps.cisco.com/security/center/content/CiscoSecurityAdvisory/cisco-sa-sdwan-rpa2-v69WY2SW)", + "file_path": "Flames/H203.md" + }, { "id": "M001", "category": "Alchemy", diff --git a/public/actor-mentions.json b/public/actor-mentions.json index 14c2e56d..fc13b982 100644 --- a/public/actor-mentions.json +++ b/public/actor-mentions.json @@ -1,5 +1,5 @@ { - "generated_at": "2026-06-22T14:38:25Z", + "generated_at": "2026-06-25T00:22:49Z", "mentions": { "actor:G0007": [ "B005", diff --git a/public/hunts-data.json b/public/hunts-data.json index 1f114065..6877cceb 100644 --- a/public/hunts-data.json +++ b/public/hunts-data.json @@ -747,113 +747,6 @@ "references": "- [MITRE ATT&CK T1543.003 — Create or Modify System Process: Windows Service](https://attack.mitre.org/techniques/T1543/003/)\n- [source report — ESET: Killing me gently — Inside Gentlemen's EDR killer framework](https://www.welivesecurity.com/en/eset-research/killing-me-gently-inside-gentlemens-edr-killer-framework/)\n- [LOLDrivers.io — Living Off The Land Drivers (allowlist/blocklist source: hashes, signers, Sysmon config)](https://www.loldrivers.io/)\n- [LOLDrivers — Sysmon config of vulnerable hashes (baseline enrichment)](https://github.com/magicsword-io/LOLDrivers/blob/main/detections/sysmon/sysmon_config_vulnerable_hashes.xml)\n- [TrustedSec — Sysmon Community Guide: Driver Loading (EID 6 fields, CheckRevocation)](https://github.com/trustedsec/SysmonCommunityGuide/blob/master/chapters/driver-loading.md)\n- [Splunk — These Are The Drivers You Are Looking For: Detect and Prevent Malicious Drivers](https://www.splunk.com/en_us/blog/security/these-are-the-drivers-you-are-looking-for-detect-and-prevent-malicious-drivers.html)", "file_path": "Embers/B026.md" }, - { - "id": "H-2026-001", - "category": "Flames", - "title": "Threat actors are using DLL side-loading to force legitimate signed VirtualBox executables (DbgView.exe) to load malicious DLLs (vboxrt.dll) that download additional malicious code from attacker-controlled servers to evade security monitoring and achieve privileged code execution.", - "tactic": "Defense Evasion", - "notes": "Based on ATT&CK technique T1574.002. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "defense_evasion", - "dll_sideloading", - "virtualbox", - "dragonforce", - "T1574.002" - ], - "techniques": [ - "T1574.002" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "Badger", - "link": "" - }, - "why": "- DLL side-loading allows attackers to bypass application control and security monitoring by leveraging the trust placed in legitimate, signed executables like VirtualBox/DbgView, making malicious activity appear benign\n- This technique provides attackers with privileged code execution context inherited from the trusted VirtualBox process, enabling them to perform reconnaissance, download additional tools, and establish persistence while evading detection\n- The DragonForce ransomware group (Hackledorb) weaponized this technique as part of a sophisticated multi-month intrusion that ultimately led to data exfiltration and encryption, demonstrating how DLL side-loading serves as a critical enabler for advanced persistent threats\n- Detecting this behavior is particularly important because it represents an early-stage defense evasion technique that, if caught, can prevent the deployment of more sophisticated tools like Backdoor.Turn and the eventual ransomware payload", - "references": "- [MITRE ATT&CK T1574.002 - Hijack Execution Flow: DLL Side-Loading](https://attack.mitre.org/techniques/T1574/002/)\n- [Source CTI Report - Hidden in Teams: DragonForce Attackers Weaponize Microsoft Teams Relays to Stay Hidden](https://symantec-enterprise-blogs.security.com/threat-intelligence/dragonforce-msteams-backdoor)", - "file_path": "Flames/H-2026-001.md" - }, - { - "id": "H-2026-002", - "category": "Flames", - "title": "Adversaries are creating domain-level content compliance rules with regular expression patterns matching strategic intelligence keywords to silently BCC-forward emails to external attacker-controlled Gmail accounts for covert data exfiltration.", - "tactic": "Collection", - "notes": "Based on ATT&CK technique T1114.003. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "collection", - "email", - "china", - "unc6508", - "T1114.003" - ], - "techniques": [ - "T1114.003" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "T3chn3", - "link": "" - }, - "why": "- This technique represents a novel and highly effective method for persistent, automated data exfiltration that operates silently without user awareness or typical email forwarding indicators\n- Content compliance rules are legitimate administrative features that bypass traditional email security controls and DLP solutions, making detection significantly more challenging than standard email forwarding rules\n- UNC6508 successfully used this technique to exfiltrate strategic intelligence on defense operations, AI research, military health, and Indo-Pacific command for over a year undetected, demonstrating its effectiveness for long-term espionage campaigns\n- The technique provides continuous, real-time access to sensitive communications matching specific intelligence requirements without requiring repeated interactive access to compromised accounts\n- Detection of unauthorized compliance rules can reveal compromised administrator accounts and prevent ongoing data loss across entire organizational units rather than individual mailboxes", - "references": "- [MITRE ATT&CK T1114.003 - Email Collection: Email Forwarding Rule](https://attack.mitre.org/techniques/T1114/003/)\n- [Source CTI Report](https://cloud.google.com/blog/topics/threat-intelligence/prc-targets-us-medical-research)", - "file_path": "Flames/H-2026-002.md" - }, - { - "id": "H-2026-003", - "category": "Flames", - "title": "Threat actors are exfiltrating API credentials stored in JetBrains IDE plugin settings by transmitting them unencrypted over HTTP to a hardcoded command and control server at 39.107.60[.]51 immediately after users click \"Apply\" in the plugin configuration interface.", - "tactic": "Credential Access", - "notes": "Based on ATT&CK technique T1552.001. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "credential_access", - "jetbrains", - "api_keys", - "T1552.001" - ], - "techniques": [ - "T1552.001" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "Shilpa Merlin Joy", - "link": "https://github.com/shilpamerlin/" - }, - "why": "- This campaign demonstrates a novel supply chain attack vector targeting developer tools, with at least 15 malicious plugins accumulating nearly 70,000 installations across the JetBrains Marketplace\n- Stolen AI API keys provide attackers with immediate access to paid AI services, enabling both direct financial fraud and potential data exfiltration through AI model interactions\n- The unencrypted HTTP transmission to a hardcoded IP address creates a highly detectable network signature that can identify compromised developer workstations before credentials are monetized\n- Detection of this behavior protects high-value AI API credentials that could be resold or used to conduct further attacks, as evidenced by the campaign's suspected credential redistribution scheme to paid plugin users", - "references": "- [MITRE ATT&CK T1552.001 - Unsecured Credentials: Credentials In Files](https://attack.mitre.org/techniques/T1552/001/)\n- [Source CTI Report](https://www.bleepingcomputer.com/news/security/malicious-jetbrains-marketplace-plugins-steal-ai-api-keys-from-developers/)", - "file_path": "Flames/H-2026-003.md" - }, - { - "id": "H-2026-004", - "category": "Flames", - "title": "Adversaries are sending crafted requests to bypass peering authentication mechanisms on Cisco Catalyst SD-WAN Controllers, Managers, and Validators to establish unauthorized control connections with state \"up\" and zero challenge-ack values in connection statistics, enabling administrative access to NETCONF for SD-WAN fabric manipulation.", - "tactic": "Defense Evasion", - "notes": "Based on ATT&CK technique T1562.006. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", - "tags": [ - "defense_evasion", - "sdwan", - "cisco", - "authentication_bypass", - "T1562.006" - ], - "techniques": [ - "T1562.006" - ], - "severity": null, - "status": "current", - "related_hunt_ids": [], - "submitter": { - "name": "young6x7", - "link": "" - }, - "why": "- This authentication bypass vulnerability (CVE-2026-20182) allows unauthenticated remote attackers to gain high-privileged administrative access to SD-WAN infrastructure, representing a critical security control failure\n- Successful exploitation enables attackers to manipulate network configurations across the entire SD-WAN fabric through NETCONF access, potentially affecting routing, security policies, and network segmentation\n- Cisco PSIRT confirmed limited active exploitation in May 2026, making this an immediate threat requiring detection capabilities beyond signature-based approaches\n- The specific indicator of \"state:up\" with \"challenge-ack 0\" in control connection statistics provides a concrete, observable artifact that distinguishes malicious authentication bypass from legitimate peering events\n- Internet-exposed SD-WAN controllers are at highest risk, and the vulnerability affects all deployment types including on-premises, cloud-managed, and government implementations", - "references": "- [MITRE ATT&CK T1562.006 - Impair Defenses: Indicator Blocking](https://attack.mitre.org/techniques/T1562/006/)\n- [Source CTI Report](https://sec.cloudapps.cisco.com/security/center/content/CiscoSecurityAdvisory/cisco-sa-sdwan-rpa2-v69WY2SW)", - "file_path": "Flames/H-2026-004.md" - }, { "id": "H001", "category": "Flames", @@ -6589,6 +6482,113 @@ "references": "- [MITRE ATT&CK T1195.002 — Supply Chain Compromise: Compromise Software Supply Chain](https://attack.mitre.org/techniques/T1195/002/)\n- [MITRE ATT&CK T1059 — Command and Scripting Interpreter](https://attack.mitre.org/techniques/T1059/)\n- [source report — Microsoft: Mastra npm supply chain compromise (Sapphire Sleet)](https://www.microsoft.com/en-us/security/blog/2026/06/17/postinstall-payload-inside-mastra-npm-supply-chain-compromise/)\n- [Splunk — Defending Against npm Supply Chain Attacks: Detection, Emulation, and Analysis (SPL for npm lifecycle script abuse)](https://www.splunk.com/en_us/blog/security/npm-supply-chain-attack-detection-analysis.html)\n- [Undercode Testing — Hunt Down Malicious npm Packages: a KQL Masterclass (node/npm lineage + lifecycle hooks)](https://undercodetesting.com/unmask-the-invisible-hunt-down-malicious-npm-packages-with-this-kql-masterclass/)\n- [Microsoft — Mitigating the Axios npm supply chain compromise (postinstall dropper, %PROGRAMDATA% LOLBin staging)](https://www.microsoft.com/en-us/security/blog/2026/04/01/mitigating-the-axios-npm-supply-chain-compromise/)\n- [NVISO — The Axios npm supply chain incident: fake dependency, real backdoor (install-time behavioral detection)](https://blog.nviso.eu/2026/04/03/the-axios-npm-supply-chain-incident-fake-dependency-real-backdoor/)", "file_path": "Flames/H199.md" }, + { + "id": "H200", + "category": "Flames", + "title": "Threat actors are using DLL side-loading to force legitimate signed VirtualBox executables (DbgView.exe) to load malicious DLLs (vboxrt.dll) that download additional malicious code from attacker-controlled servers to evade security monitoring and achieve privileged code execution.", + "tactic": "Defense Evasion", + "notes": "Based on ATT&CK technique T1574.002. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "defense_evasion", + "dll_sideloading", + "virtualbox", + "dragonforce", + "T1574.002" + ], + "techniques": [ + "T1574.002" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "Badger", + "link": "" + }, + "why": "- DLL side-loading allows attackers to bypass application control and security monitoring by leveraging the trust placed in legitimate, signed executables like VirtualBox/DbgView, making malicious activity appear benign\n- This technique provides attackers with privileged code execution context inherited from the trusted VirtualBox process, enabling them to perform reconnaissance, download additional tools, and establish persistence while evading detection\n- The DragonForce ransomware group (Hackledorb) weaponized this technique as part of a sophisticated multi-month intrusion that ultimately led to data exfiltration and encryption, demonstrating how DLL side-loading serves as a critical enabler for advanced persistent threats\n- Detecting this behavior is particularly important because it represents an early-stage defense evasion technique that, if caught, can prevent the deployment of more sophisticated tools like Backdoor.Turn and the eventual ransomware payload", + "references": "- [MITRE ATT&CK T1574.002 - Hijack Execution Flow: DLL Side-Loading](https://attack.mitre.org/techniques/T1574/002/)\n- [Source CTI Report - Hidden in Teams: DragonForce Attackers Weaponize Microsoft Teams Relays to Stay Hidden](https://symantec-enterprise-blogs.security.com/threat-intelligence/dragonforce-msteams-backdoor)", + "file_path": "Flames/H200.md" + }, + { + "id": "H201", + "category": "Flames", + "title": "Adversaries are creating domain-level content compliance rules with regular expression patterns matching strategic intelligence keywords to silently BCC-forward emails to external attacker-controlled Gmail accounts for covert data exfiltration.", + "tactic": "Collection", + "notes": "Based on ATT&CK technique T1114.003. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "collection", + "email", + "china", + "unc6508", + "T1114.003" + ], + "techniques": [ + "T1114.003" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "T3chn3", + "link": "" + }, + "why": "- This technique represents a novel and highly effective method for persistent, automated data exfiltration that operates silently without user awareness or typical email forwarding indicators\n- Content compliance rules are legitimate administrative features that bypass traditional email security controls and DLP solutions, making detection significantly more challenging than standard email forwarding rules\n- UNC6508 successfully used this technique to exfiltrate strategic intelligence on defense operations, AI research, military health, and Indo-Pacific command for over a year undetected, demonstrating its effectiveness for long-term espionage campaigns\n- The technique provides continuous, real-time access to sensitive communications matching specific intelligence requirements without requiring repeated interactive access to compromised accounts\n- Detection of unauthorized compliance rules can reveal compromised administrator accounts and prevent ongoing data loss across entire organizational units rather than individual mailboxes", + "references": "- [MITRE ATT&CK T1114.003 - Email Collection: Email Forwarding Rule](https://attack.mitre.org/techniques/T1114/003/)\n- [Source CTI Report](https://cloud.google.com/blog/topics/threat-intelligence/prc-targets-us-medical-research)", + "file_path": "Flames/H201.md" + }, + { + "id": "H202", + "category": "Flames", + "title": "Threat actors are exfiltrating API credentials stored in JetBrains IDE plugin settings by transmitting them unencrypted over HTTP to a hardcoded command and control server at 39.107.60[.]51 immediately after users click \"Apply\" in the plugin configuration interface.", + "tactic": "Credential Access", + "notes": "Based on ATT&CK technique T1552.001. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "credential_access", + "jetbrains", + "api_keys", + "T1552.001" + ], + "techniques": [ + "T1552.001" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "Shilpa Merlin Joy", + "link": "https://github.com/shilpamerlin/" + }, + "why": "- This campaign demonstrates a novel supply chain attack vector targeting developer tools, with at least 15 malicious plugins accumulating nearly 70,000 installations across the JetBrains Marketplace\n- Stolen AI API keys provide attackers with immediate access to paid AI services, enabling both direct financial fraud and potential data exfiltration through AI model interactions\n- The unencrypted HTTP transmission to a hardcoded IP address creates a highly detectable network signature that can identify compromised developer workstations before credentials are monetized\n- Detection of this behavior protects high-value AI API credentials that could be resold or used to conduct further attacks, as evidenced by the campaign's suspected credential redistribution scheme to paid plugin users", + "references": "- [MITRE ATT&CK T1552.001 - Unsecured Credentials: Credentials In Files](https://attack.mitre.org/techniques/T1552/001/)\n- [Source CTI Report](https://www.bleepingcomputer.com/news/security/malicious-jetbrains-marketplace-plugins-steal-ai-api-keys-from-developers/)", + "file_path": "Flames/H202.md" + }, + { + "id": "H203", + "category": "Flames", + "title": "Adversaries are sending crafted requests to bypass peering authentication mechanisms on Cisco Catalyst SD-WAN Controllers, Managers, and Validators to establish unauthorized control connections with state \"up\" and zero challenge-ack values in connection statistics, enabling administrative access to NETCONF for SD-WAN fabric manipulation.", + "tactic": "Defense Evasion", + "notes": "Based on ATT&CK technique T1562.006. Generated by [hearth-auto-intel](https://github.com/THORCollective/HEARTH).", + "tags": [ + "defense_evasion", + "sdwan", + "cisco", + "authentication_bypass", + "T1562.006" + ], + "techniques": [ + "T1562.006" + ], + "severity": null, + "status": "current", + "related_hunt_ids": [], + "submitter": { + "name": "young6x7", + "link": "" + }, + "why": "- This authentication bypass vulnerability (CVE-2026-20182) allows unauthenticated remote attackers to gain high-privileged administrative access to SD-WAN infrastructure, representing a critical security control failure\n- Successful exploitation enables attackers to manipulate network configurations across the entire SD-WAN fabric through NETCONF access, potentially affecting routing, security policies, and network segmentation\n- Cisco PSIRT confirmed limited active exploitation in May 2026, making this an immediate threat requiring detection capabilities beyond signature-based approaches\n- The specific indicator of \"state:up\" with \"challenge-ack 0\" in control connection statistics provides a concrete, observable artifact that distinguishes malicious authentication bypass from legitimate peering events\n- Internet-exposed SD-WAN controllers are at highest risk, and the vulnerability affects all deployment types including on-premises, cloud-managed, and government implementations", + "references": "- [MITRE ATT&CK T1562.006 - Impair Defenses: Indicator Blocking](https://attack.mitre.org/techniques/T1562/006/)\n- [Source CTI Report](https://sec.cloudapps.cisco.com/security/center/content/CiscoSecurityAdvisory/cisco-sa-sdwan-rpa2-v69WY2SW)", + "file_path": "Flames/H203.md" + }, { "id": "M001", "category": "Alchemy", diff --git a/scripts/generate_from_cti.py b/scripts/generate_from_cti.py index e791713d..50a00fb7 100644 --- a/scripts/generate_from_cti.py +++ b/scripts/generate_from_cti.py @@ -2,11 +2,11 @@ from pathlib import Path from dotenv import load_dotenv from PyPDF2 import PdfReader -from datetime import datetime # Add Anthropic (Claude) support try: import anthropic + CLAUDE_AVAILABLE = True except ImportError: CLAUDE_AVAILABLE = False @@ -14,6 +14,7 @@ # Import MITRE ATT&CK data try: from mitre_attack import get_mitre_attack + MITRE_AVAILABLE = True except ImportError: print("⚠️ MITRE ATT&CK data not available. Using fallback tactic matching.") @@ -22,6 +23,7 @@ # Import duplicate detection try: from duplicate_detection import check_duplicates_for_new_submission + DUPLICATE_DETECTION_AVAILABLE = True except ImportError: DUPLICATE_DETECTION_AVAILABLE = False @@ -36,17 +38,20 @@ if AI_PROVIDER == "claude": if not CLAUDE_AVAILABLE: - raise ImportError("Anthropic (Claude) client not installed. Please install 'anthropic' Python package.") + raise ImportError( + "Anthropic (Claude) client not installed. Please install 'anthropic' Python package." + ) ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") if not ANTHROPIC_API_KEY: raise ValueError("ANTHROPIC_API_KEY not set in environment.") anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) else: from openai import OpenAI + client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) CTI_INPUT_DIR = Path(".hearth/intel-drops/") -OUTPUT_DIR = Path("Flames/") +OUTPUT_DIR = Path("Flames/") PROCESSED_DIR = Path(".hearth/processed-intel-drops/") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) PROCESSED_DIR.mkdir(parents=True, exist_ok=True) @@ -63,7 +68,7 @@ def extract_technique_and_tactic(content: str) -> tuple: import re # Try to extract technique ID from content - technique_pattern = r'T\d{4}(?:\.\d{3})?' + technique_pattern = r"T\d{4}(?:\.\d{3})?" techniques_found = re.findall(technique_pattern, content) if techniques_found and MITRE_AVAILABLE: @@ -79,12 +84,14 @@ def extract_technique_and_tactic(content: str) -> tuple: return (tech_id, tactic, 1.0) # 100% confidence from MITRE # Fallback: Extract from table in generated content - lines = content.split('\n') + lines = content.split("\n") for line in lines: - if '|' in line: - parts = [p.strip() for p in line.split('|')] + if "|" in line: + parts = [p.strip() for p in line.split("|")] if len(parts) >= 4: - potential_tactic = parts[2].strip() if len(parts) > 2 and parts[2].strip() else None + potential_tactic = ( + parts[2].strip() if len(parts) > 2 and parts[2].strip() else None + ) if potential_tactic and potential_tactic.lower() != "tactic": # Validate tactic name if MITRE available if MITRE_AVAILABLE: @@ -92,37 +99,59 @@ def extract_technique_and_tactic(content: str) -> tuple: # Normalize tactic name for mitre_tactic in mitre.tactics.keys(): if mitre_tactic.lower() == potential_tactic.lower(): - return (None, mitre_tactic, 0.8) # 80% confidence from table + return ( + None, + mitre_tactic, + 0.8, + ) # 80% confidence from table return (None, potential_tactic, 0.7) # 70% confidence from table # Final fallback: Keyword-based inference (low confidence) - hypothesis = content.split('\n')[0].strip() + hypothesis = content.split("\n")[0].strip() hypothesis_lower = hypothesis.lower() - if any(word in hypothesis_lower for word in ['tunnel', 'proxy', 'socks', 'c2', 'command', 'control', 'communication']): + if any( + word in hypothesis_lower + for word in [ + "tunnel", + "proxy", + "socks", + "c2", + "command", + "control", + "communication", + ] + ): return (None, "Command And Control", 0.3) - elif any(word in hypothesis_lower for word in ['download', 'execute', 'run', 'launch', 'powershell', 'cmd']): + elif any( + word in hypothesis_lower + for word in ["download", "execute", "run", "launch", "powershell", "cmd"] + ): return (None, "Execution", 0.3) - elif any(word in hypothesis_lower for word in ['persist', 'startup', 'service', 'registry', 'scheduled']): + elif any( + word in hypothesis_lower + for word in ["persist", "startup", "service", "registry", "scheduled"] + ): return (None, "Persistence", 0.3) - elif any(word in hypothesis_lower for word in ['credential', 'password', 'token', 'hash', 'mimikatz']): + elif any( + word in hypothesis_lower + for word in ["credential", "password", "token", "hash", "mimikatz"] + ): return (None, "Credential Access", 0.3) else: return (None, "Command And Control", 0.2) # Default fallback def get_next_hunt_id(): - """Scans the Flames/ directory to find the next available hunt ID.""" + """Next Flames hunt number: max existing ``HNNN`` + 1.""" flames_dir = Path("Flames/") flames_dir.mkdir(exist_ok=True) max_id = 0 - hunt_pattern = re.compile(r"H-\d{4}-(\d{3,})\.md") - for f in flames_dir.glob("H-*.md"): - match = hunt_pattern.match(f.name) + hunt_pattern = re.compile(r"^H(\d+)$") + for f in flames_dir.glob("H*.md"): + match = hunt_pattern.match(f.stem) if match: - current_id = int(match.group(1)) - if current_id > max_id: - max_id = current_id + max_id = max(max_id, int(match.group(1))) return max_id + 1 @@ -198,7 +227,7 @@ def get_next_hunt_id(): def summarize_cti_with_map_reduce(text, model="gpt-4", max_tokens=128000): """ - Summarizes long text by splitting it into chunks, summarizing each, + Summarizes long text by splitting it into chunks, summarizing each, and then creating a final summary of the summaries. This is a 'map-reduce' approach to handle large contexts. """ @@ -209,7 +238,9 @@ def summarize_cti_with_map_reduce(text, model="gpt-4", max_tokens=128000): print("✅ CTI content is within the context window. No summarization needed.") return text - print(f"⚠️ CTI content is too long ({int(text_token_count)} tokens). Starting map-reduce summarization.") + print( + f"⚠️ CTI content is too long ({int(text_token_count)} tokens). Starting map-reduce summarization." + ) # 1. Map: Split the document into overlapping chunks chunk_size = int(max_tokens * 0.6) # Use 60% of the model's context for each chunk @@ -227,7 +258,7 @@ def summarize_cti_with_map_reduce(text, model="gpt-4", max_tokens=128000): # 2. Summarize each chunk chunk_summaries = [] for i, chunk in enumerate(chunks): - print(f"Summarizing chunk {i +1}/{len(chunks)}...") + print(f"Summarizing chunk {i + 1}/{len(chunks)}...") try: if AI_PROVIDER == "claude": # Claude prompt format: system prompt, then user content @@ -236,33 +267,36 @@ def summarize_cti_with_map_reduce(text, model="gpt-4", max_tokens=128000): "Extract the key actionable intelligence from this section. " "Focus on specific tools, techniques, vulnerabilities, and adversary procedures. " "Your output will be combined with others, so be concise and clear.\n\n" - f"--- CHUNK {i +1}/{len(chunks)} ---\n\n{chunk}\n\nAssistant:" + f"--- CHUNK {i + 1}/{len(chunks)} ---\n\n{chunk}\n\nAssistant:" ) response = anthropic_client.messages.create( model=CLAUDE_MODEL, max_tokens=1024, temperature=0.2, - messages=[{"role": "user", "content": prompt}] + messages=[{"role": "user", "content": prompt}], ) summary = response.content[0].text.strip() else: response = client.chat.completions.create( model=model, - messages=[{"role": "user", "content": - "This is one part of a larger threat intelligence report. " - "Extract the key actionable intelligence from this section. " - "Focus on specific tools, techniques, vulnerabilities, and adversary procedures. " - "Your output will be combined with others, so be concise and clear.\n\n" - f"--- CHUNK {i +1}/{len(chunks)} ---\n\n{chunk}" - }], + messages=[ + { + "role": "user", + "content": "This is one part of a larger threat intelligence report. " + "Extract the key actionable intelligence from this section. " + "Focus on specific tools, techniques, vulnerabilities, and adversary procedures. " + "Your output will be combined with others, so be concise and clear.\n\n" + f"--- CHUNK {i + 1}/{len(chunks)} ---\n\n{chunk}", + } + ], temperature=0.2, ) summary = response.choices[0].message.content.strip() chunk_summaries.append(summary) except Exception as e: - print(f"❌ Error summarizing chunk {i +1}: {e}") + print(f"❌ Error summarizing chunk {i + 1}: {e}") # If a chunk fails, we just add a note and continue - chunk_summaries.append(f"[Could not summarize chunk {i +1}]") + chunk_summaries.append(f"[Could not summarize chunk {i + 1}]") # 3. Reduce: Create a final summary from the individual summaries print("Creating final summary of all chunks...") @@ -281,19 +315,22 @@ def summarize_cti_with_map_reduce(text, model="gpt-4", max_tokens=128000): model=CLAUDE_MODEL, max_tokens=2048, temperature=0.2, - messages=[{"role": "user", "content": prompt}] + messages=[{"role": "user", "content": prompt}], ) return final_response.content[0].text.strip() else: final_response = client.chat.completions.create( model=model, - messages=[{"role": "user", "content": - "The following are summaries of different parts of a long threat intelligence report. " - "Synthesize them into a single, coherent, and actionable report. " - "Remove redundancy and create a clear narrative of the adversary's actions. " - "The final output should be a comprehensive summary that can be used to generate a threat hunt.\n\n" - f"--- COMBINED SUMMARIES ---\n\n{combined_summary}" - }], + messages=[ + { + "role": "user", + "content": "The following are summaries of different parts of a long threat intelligence report. " + "Synthesize them into a single, coherent, and actionable report. " + "Remove redundancy and create a clear narrative of the adversary's actions. " + "The final output should be a comprehensive summary that can be used to generate a threat hunt.\n\n" + f"--- COMBINED SUMMARIES ---\n\n{combined_summary}", + } + ], temperature=0.2, ) return final_response.choices[0].message.content.strip() @@ -320,8 +357,14 @@ def cleanup_hunt_body(ai_content): # These are keywords we want to strip out if they appear before the hypothesis. is_unwanted_prefix = any( - stripped_line.lower().startswith(prefix) for prefix in - ['cti report:', 'hypothesis:', '---', 'instructions:', 'your output should'] + stripped_line.lower().startswith(prefix) + for prefix in [ + "cti report:", + "hypothesis:", + "---", + "instructions:", + "your output should", + ] ) if not is_unwanted_prefix: @@ -335,12 +378,18 @@ def cleanup_hunt_body(ai_content): # The hypothesis might have a "Hypothesis:" label. Let's remove that specifically. first_line = lines[first_content_index] if "hypothesis:" in first_line.lower(): - lines[first_content_index] = first_line.split(':', 1)[-1].strip() + lines[first_content_index] = first_line.split(":", 1)[-1].strip() return "\n".join(lines[first_content_index:]).strip() -def generate_hunt_content(cti_text, cti_source_url, submitter_credit, is_regeneration=False, user_feedback=None): +def generate_hunt_content( + cti_text, + cti_source_url, + submitter_credit, + is_regeneration=False, + user_feedback=None, +): """Generate hunt content from CTI text.""" try: print("Starting CTI summarization...") @@ -370,7 +419,7 @@ def generate_hunt_content(cti_text, cti_source_url, submitter_credit, is_regener regeneration_instruction=regeneration_instruction, cti_text=cti_text, cti_source_url=cti_source_url, - submitter_credit=submitter_credit + submitter_credit=submitter_credit, ) if AI_PROVIDER == "claude": full_prompt = f"\n\nHuman: {SYSTEM_PROMPT}\n\n{prompt}\n\nAssistant:" @@ -378,16 +427,18 @@ def generate_hunt_content(cti_text, cti_source_url, submitter_credit, is_regener model=CLAUDE_MODEL, max_tokens=1200, temperature=temperature, - messages=[{"role": "user", "content": full_prompt}] + messages=[{"role": "user", "content": full_prompt}], ) return response.content[0].text.strip() else: response = client.chat.completions.create( model="gpt-4", - messages=[{"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": prompt}], + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], temperature=temperature, - max_tokens=800 + max_tokens=800, ) return response.choices[0].message.content.strip() except Exception as e: @@ -397,7 +448,7 @@ def generate_hunt_content(cti_text, cti_source_url, submitter_credit, is_regener def read_file_content(file_path): """Read content from either PDF or text file.""" - if file_path.suffix.lower() == '.pdf': + if file_path.suffix.lower() == ".pdf": try: reader = PdfReader(file_path) text = "" @@ -424,25 +475,8 @@ def read_file_content(file_path): hunt_id = out_md_path.stem print(f"🔄 Regenerating hunt for {hunt_id} at {out_md_path}") else: - # Determine the next hunt number - hunt_files = list(Path(".").glob("Flames/H*.md")) - if hunt_files: - # Filter files that have the expected format and extract numbers - hunt_numbers = [] - for f in hunt_files: - parts = f.stem.split('-') - if len(parts) >= 3 and parts[-1].isdigit(): - hunt_numbers.append(int(parts[-1])) - - if hunt_numbers: - next_hunt_num = max(hunt_numbers) + 1 - else: - next_hunt_num = 1 - else: - next_hunt_num = 1 - - year = datetime.now().year - hunt_id = f"H-{year}-{next_hunt_num:03d}" + # Determine the next hunt number (HNNN, continuing the Flames sequence) + hunt_id = f"H{get_next_hunt_id():03d}" out_md_path = Path(f"Flames/{hunt_id}.md") print(f"🌱 Generating new hunt: {hunt_id}") @@ -455,7 +489,9 @@ def read_file_content(file_path): # SECURELY get CTI content from the file prepared by the workflow cti_files = list(CTI_INPUT_DIR.glob("*")) if not cti_files: - raise FileNotFoundError(f"❌ No CTI file found in the input directory '{CTI_INPUT_DIR}'.") + raise FileNotFoundError( + f"❌ No CTI file found in the input directory '{CTI_INPUT_DIR}'." + ) cti_file_path = cti_files[0] print(f"📄 Processing CTI file: {cti_file_path}") @@ -479,7 +515,7 @@ def read_file_content(file_path): cti_source_url, submitter_credit, is_regeneration=is_regeneration, - user_feedback=user_feedback + user_feedback=user_feedback, ) if hunt_body: @@ -487,14 +523,16 @@ def read_file_content(file_path): cleaned_body = cleanup_hunt_body(hunt_body) # 3. Extract the hypothesis (first line of cleaned content) - hypothesis = cleaned_body.split('\n')[0].strip() - if hypothesis.startswith('#'): + hypothesis = cleaned_body.split("\n")[0].strip() + if hypothesis.startswith("#"): # Remove markdown headers - hypothesis = hypothesis.lstrip('#').strip() + hypothesis = hypothesis.lstrip("#").strip() # 4. Construct the final markdown content final_content = f"# {hunt_id}\n\n" - final_content += cleaned_body.replace("| [Leave blank] |", f"| {hunt_id} |") + final_content += cleaned_body.replace( + "| [Leave blank] |", f"| {hunt_id} |" + ) # 5. Save the hunt file with open(out_md_path, "w") as f: @@ -502,24 +540,24 @@ def read_file_content(file_path): print(f"✅ Successfully wrote hunt to {out_md_path}") # 6. Set the output for the GitHub Action - if 'GITHUB_OUTPUT' in os.environ: - with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - print(f'HUNT_FILE_PATH={out_md_path}', file=f) - print(f'HUNT_ID={hunt_id}', file=f) - print('HYPOTHESIS< int | None: - """Return the numeric part of an ``H-YYYY-NNN`` stem, or None if it isn't one.""" + """Return the numeric part of an ``HNNN`` stem, or None if it isn't one.""" match = HUNT_STEM_RE.match(stem) - return int(match.group(2)) if match else None + return int(match.group(1)) if match else None def existing_numbers(names: Iterable[str]) -> set[int]: - """Collect the numeric IDs from an iterable of ``H-YYYY-NNN(.md)`` names/paths.""" + """Collect the numeric IDs from an iterable of ``HNNN(.md)`` names/paths.""" nums: set[int] = set() for name in names: num = parse_hunt_number(Path(name).stem) @@ -39,8 +39,8 @@ def next_free_number(existing: set[int]) -> int: return max(existing) + 1 if existing else 1 -def format_hunt_id(year: int, num: int) -> str: - return f"H-{year}-{num:03d}" +def format_hunt_id(num: int) -> str: + return f"H{num:03d}" def rewrite_hunt_id(path: Path, new_id: str) -> Path: @@ -59,7 +59,7 @@ def rewrite_hunt_id(path: Path, new_id: str) -> Path: lines[0] = f"# {new_id}" text = "\n".join(lines) - # Replace a populated Hunt# cell (e.g. "| H-2026-001 |"); a no-op for the + # Replace a populated Hunt# cell (e.g. "| H200 |"); a no-op for the # common case where generated files leave that cell empty. text = re.sub(rf"\|\s*{re.escape(old_id)}\s*\|", f"| {new_id} |", text, count=1) diff --git a/scripts/process_hunt_submission.py b/scripts/process_hunt_submission.py index 9226c44c..3b8a26d6 100644 --- a/scripts/process_hunt_submission.py +++ b/scripts/process_hunt_submission.py @@ -1,8 +1,8 @@ import os +import re from pathlib import Path from openai import OpenAI from dotenv import load_dotenv -from datetime import datetime load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) @@ -57,29 +57,29 @@ def parse_issue_body(body): """Parses the structured data from the HEARTH Hunt Submission Form issue.""" details = {} - sections = body.split('###')[1:] + sections = body.split("###")[1:] for section in sections: try: - lines = section.strip().split('\n') + lines = section.strip().split("\n") header = lines[0].strip().lower() content = "\n".join(lines[1:]).strip() if "hunt type" in header: - details['hunt_type'] = content + details["hunt_type"] = content elif "hunt idea / hypothesis" in header: - details['hypothesis'] = content + details["hypothesis"] = content elif "mitre att&ck tactic" in header: - details['tactic'] = content + details["tactic"] = content elif "implementation notes" in header: - details['notes'] = content + details["notes"] = content elif "search tags" in header: - details['tags'] = content + details["tags"] = content elif "value and impact" in header: - details['why'] = content + details["why"] = content elif "knowledge base" in header: - details['references'] = content + details["references"] = content elif "hearth crafter" in header: - details['submitter'] = content + details["submitter"] = content except IndexError: continue # Ignore malformed sections return details @@ -88,37 +88,37 @@ def parse_issue_body(body): def generate_hunt_file(details): """Generates the hunt file content using the AI.""" prompt = USER_TEMPLATE.format( - hunt_type=details.get('hunt_type', 'Flames'), - hypothesis=details.get('hypothesis', ''), - tactic=details.get('tactic', ''), - notes=details.get('notes', 'N/A'), - tags=details.get('tags', ''), - why=details.get('why', ''), - references=details.get('references', ''), - submitter=details.get('submitter', 'A Helpful Contributor') + hunt_type=details.get("hunt_type", "Flames"), + hypothesis=details.get("hypothesis", ""), + tactic=details.get("tactic", ""), + notes=details.get("notes", "N/A"), + tags=details.get("tags", ""), + why=details.get("why", ""), + references=details.get("references", ""), + submitter=details.get("submitter", "A Helpful Contributor"), ) response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": prompt} + {"role": "user", "content": prompt}, ], temperature=0.1, - max_tokens=1200 + max_tokens=1200, ) return response.choices[0].message.content.strip() def get_next_hunt_id(hunt_type_prefix, hunt_dir): - """Determines the next hunt ID for a given type.""" - hunt_files = list(Path(hunt_dir).glob(f"{hunt_type_prefix}*.md")) - next_hunt_num = 1 - if hunt_files: - hunt_numbers = [int(f.stem.split('-')[-1]) for f in hunt_files if f.stem.split('-')[-1].isdigit()] - if hunt_numbers: - next_hunt_num = max(hunt_numbers) + 1 - return next_hunt_num + """Next number for a prefix: max existing ``NNN`` + 1 (1 if none).""" + stem_re = re.compile(rf"^{re.escape(hunt_type_prefix)}(\d+)$") + numbers = [ + int(m.group(1)) + for f in Path(hunt_dir).glob(f"{hunt_type_prefix}*.md") + if (m := stem_re.match(f.stem)) + ] + return max(numbers) + 1 if numbers else 1 if __name__ == "__main__": @@ -130,22 +130,21 @@ def get_next_hunt_id(hunt_type_prefix, hunt_dir): hunt_details = parse_issue_body(issue_body) # 2. Determine hunt type, prefix, and directory - hunt_type = hunt_details.get('hunt_type', 'Flames').lower() - if 'flames' in hunt_type: - prefix, directory = 'H', 'Flames' - elif 'embers' in hunt_type: - prefix, directory = 'B', 'Embers' - elif 'alchemy' in hunt_type: - prefix, directory = 'A', 'Alchemy' + hunt_type = hunt_details.get("hunt_type", "Flames").lower() + if "flames" in hunt_type: + prefix, directory = "H", "Flames" + elif "embers" in hunt_type: + prefix, directory = "B", "Embers" + elif "alchemy" in hunt_type: + prefix, directory = "M", "Alchemy" else: - prefix, directory = 'H', 'Flames' # Default to Flames + prefix, directory = "H", "Flames" # Default to Flames Path(directory).mkdir(exist_ok=True) - # 3. Determine next hunt ID + # 3. Determine next hunt ID (HNNN / BNNN / MNNN, continuing the sequence) next_id = get_next_hunt_id(prefix, directory) - year = datetime.now().year - hunt_id = f"{prefix}-{year}-{next_id:03d}" + hunt_id = f"{prefix}{next_id:03d}" out_md_path = Path(f"{directory}/{hunt_id}.md") # 4. Generate the core content @@ -161,7 +160,7 @@ def get_next_hunt_id(hunt_type_prefix, hunt_dir): print(f"✅ Successfully wrote hunt to {out_md_path}") # 7. Set output for the workflow - if 'GITHUB_OUTPUT' in os.environ: - with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - print(f'HUNT_FILE_PATH={out_md_path}', file=f) - print(f'HUNT_ID={hunt_id}', file=f) + if "GITHUB_OUTPUT" in os.environ: + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + print(f"HUNT_FILE_PATH={out_md_path}", file=f) + print(f"HUNT_ID={hunt_id}", file=f) diff --git a/scripts/reassign_hunt_id.py b/scripts/reassign_hunt_id.py index 581dc099..73f80a22 100644 --- a/scripts/reassign_hunt_id.py +++ b/scripts/reassign_hunt_id.py @@ -2,7 +2,7 @@ """Reassign a draft hunt's ID if it collides with an ID already on ``main``. Run on a checked-out draft branch with ``origin/main`` fetched. For each hunt -file the branch ADDS under ``Flames/`` whose ``H-YYYY-NNN`` number already +file the branch ADDS under ``Flames/`` whose ``HNNN`` number already exists on main, rename it to the next free number (rewriting the heading and any populated Hunt# cell), and stage the rename. @@ -76,9 +76,8 @@ def main() -> int: continue final_id = path.stem if num in main_nums: - year = int(path.stem.split("-")[1]) new_num = next_free_number(claimed) - new_id = format_hunt_id(year, new_num) + new_id = format_hunt_id(new_num) new_path = rewrite_hunt_id(path, new_id) _git("add", "-A", "--", FLAMES) claimed.add(new_num) diff --git a/scripts/tests/test_hunt_ids.py b/scripts/tests/test_hunt_ids.py index d5719fc4..45314a22 100644 --- a/scripts/tests/test_hunt_ids.py +++ b/scripts/tests/test_hunt_ids.py @@ -10,10 +10,13 @@ def test_parse_hunt_number(): - assert parse_hunt_number("H-2026-007") == 7 - assert parse_hunt_number("H-2026-012") == 12 - assert parse_hunt_number("H001") is None # old namespace ignored - assert parse_hunt_number("B-2026-001") is None # Embers prefix, not H + assert parse_hunt_number("H200") == 200 + assert parse_hunt_number("H001") == 1 + assert parse_hunt_number("B001") is None # Embers prefix, not H + assert parse_hunt_number("M001") is None # Alchemy prefix, not H + assert ( + parse_hunt_number("H-2026-007") is None + ) # legacy year format, no longer minted assert parse_hunt_number("not-an-id") is None @@ -25,8 +28,8 @@ def test_next_free_number(): def test_format_hunt_id(): - assert format_hunt_id(2026, 3) == "H-2026-003" - assert format_hunt_id(2026, 42) == "H-2026-042" + assert format_hunt_id(3) == "H003" + assert format_hunt_id(200) == "H200" def _write_hunt(path: Path, hunt_id: str, hunt_cell: str = "") -> None: @@ -42,52 +45,48 @@ def _write_hunt(path: Path, hunt_id: str, hunt_cell: str = "") -> None: def test_rewrite_hunt_id_renames_and_updates_heading(tmp_path): - src = tmp_path / "H-2026-001.md" - _write_hunt(src, "H-2026-001") # empty Hunt# cell — the real generated format - new_path = rewrite_hunt_id(src, "H-2026-003") - assert new_path.name == "H-2026-003.md" + src = tmp_path / "H200.md" + _write_hunt(src, "H200") # empty Hunt# cell — the real generated format + new_path = rewrite_hunt_id(src, "H202") + assert new_path.name == "H202.md" assert not src.exists() text = new_path.read_text() - assert text.splitlines()[0] == "# H-2026-003" - assert "# H-2026-001" not in text + assert text.splitlines()[0] == "# H202" + assert "# H200" not in text assert "## Why" in text and "## References" in text # body preserved def test_rewrite_hunt_id_updates_populated_table_cell(tmp_path): - src = tmp_path / "H-2026-002.md" - _write_hunt(src, "H-2026-002", hunt_cell="H-2026-002") - new_path = rewrite_hunt_id(src, "H-2026-004") + src = tmp_path / "H201.md" + _write_hunt(src, "H201", hunt_cell="H201") + new_path = rewrite_hunt_id(src, "H203") text = new_path.read_text() - assert "| H-2026-004 |" in text - assert "H-2026-002" not in text + assert "| H203 |" in text + assert "H201" not in text def test_find_id_problems_flags_main_collision(): - added = [("H-2026-002", "H-2026-002")] - problems = find_id_problems( - added, main_ids={"H-2026-001", "H-2026-002"}, all_stems=["H-2026-002"] - ) + added = [("H201", "H201")] + problems = find_id_problems(added, main_ids={"H200", "H201"}, all_stems=["H201"]) assert any("already exists on main" in p for p in problems) def test_find_id_problems_clean_add(): - added = [("H-2026-003", "H-2026-003")] + added = [("H202", "H202")] problems = find_id_problems( added, - main_ids={"H-2026-001", "H-2026-002"}, - all_stems=["H-2026-001", "H-2026-002", "H-2026-003"], + main_ids={"H200", "H201"}, + all_stems=["H200", "H201", "H202"], ) assert problems == [] def test_find_id_problems_heading_mismatch(): - added = [("H-2026-003", "H-2026-002")] - problems = find_id_problems(added, main_ids=set(), all_stems=["H-2026-003"]) + added = [("H202", "H201")] + problems = find_id_problems(added, main_ids=set(), all_stems=["H202"]) assert any("does not match filename" in p for p in problems) def test_find_id_problems_duplicate_in_tree(): - problems = find_id_problems( - [], main_ids=set(), all_stems=["H-2026-001", "H-2026-001"] - ) + problems = find_id_problems([], main_ids=set(), all_stems=["H200", "H200"]) assert any("duplicate" in p.lower() for p in problems)