-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgaps.py
More file actions
216 lines (177 loc) · 6.72 KB
/
gaps.py
File metadata and controls
216 lines (177 loc) · 6.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# SPDX-FileCopyrightText: 2026 KustoKing / SecM8
# SPDX-License-Identifier: Apache-2.0
"""Compute MITRE ATT&CK technique gaps — what we DON'T cover.
Companion to ``contentops.coverage.report``: that module shows what's
covered (heatmap by tactic), this one shows what's missing.
The reference list of techniques lives at
``contentops/coverage/data/mitre_attack_techniques.json`` (curated
subset — see file note). Operators can substitute their own list
via ``--techniques-file`` to drive against an org-specific threat
model or a fuller MITRE Enterprise STIX-derived export.
A "gap" is a (tactic, technique_id) pair from the reference list
that is NOT referenced by any detection envelope's
``metadata.techniques``.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from importlib import resources
from pathlib import Path
from contentops.coverage.report import ALL_TACTICS, CoverageReport
@dataclass(frozen=True)
class TechniqueRef:
"""One reference-list technique."""
id: str
name: str
tactics: tuple[str, ...]
@dataclass
class TacticGaps:
"""Uncovered techniques bucketed by tactic."""
tactic: str
uncovered: list[TechniqueRef] = field(default_factory=list)
covered_count: int = 0
total_in_tactic: int = 0
@dataclass
class GapsReport:
tactics: list[TacticGaps]
techniques_source: str
total_techniques: int
total_uncovered: int
# ---------------------------------------------------------------------------
# Loading
# ---------------------------------------------------------------------------
def _default_techniques_path() -> Path:
"""Resolve the bundled MITRE techniques JSON via importlib.resources."""
# In editable installs `contentops.coverage.data` is just a directory;
# resources.files works for both wheel and editable layouts.
return Path(str(resources.files("contentops.coverage.data") / "mitre_attack_techniques.json"))
def load_techniques(path: Path | None = None) -> tuple[list[TechniqueRef], str]:
"""Load technique reference list from ``path`` or the bundled default.
Returns ``(techniques, source_label)`` where ``source_label`` is
a short human-readable origin (filename or "bundled curated list").
"""
target = path if path is not None else _default_techniques_path()
raw = json.loads(Path(target).read_text(encoding="utf-8"))
techniques = [
TechniqueRef(
id=str(t["id"]),
name=str(t.get("name", "")),
tactics=tuple(t.get("tactics") or ()),
)
for t in raw.get("techniques", [])
]
label = (
f"custom: {target.name}" if path is not None
else "bundled curated list"
)
return techniques, label
# ---------------------------------------------------------------------------
# Core computation
# ---------------------------------------------------------------------------
def _covered_technique_ids(report: CoverageReport) -> set[str]:
"""Collect every technique id referenced anywhere in the coverage report.
A technique id can be a parent (T1059) or sub-technique (T1059.001).
For gap analysis, treat T1059.001 as "covers T1059" — the parent is
considered covered if any sub-technique is referenced.
"""
covered: set[str] = set()
for tc in report.tactics:
for tech in tc.techniques:
covered.add(tech)
if "." in tech:
covered.add(tech.split(".", 1)[0])
return covered
def compute_gaps(
report: CoverageReport, techniques: list[TechniqueRef],
) -> GapsReport:
"""Set-difference: which (tactic, technique) cells aren't in the report?
A technique is considered covered if any of its ids (parent or
sub-technique) appears in the report's referenced techniques.
"""
covered = _covered_technique_ids(report)
by_tactic: dict[str, TacticGaps] = {
t: TacticGaps(tactic=t) for t in ALL_TACTICS
}
total_techniques = 0
total_uncovered = 0
for tech in techniques:
for tactic in tech.tactics:
if tactic not in by_tactic:
# Reference list has a tactic the pipeline doesn't model.
# Skip rather than silently 'gap' against an unknown tactic.
continue
bucket = by_tactic[tactic]
bucket.total_in_tactic += 1
total_techniques += 1
if tech.id in covered:
bucket.covered_count += 1
else:
bucket.uncovered.append(tech)
total_uncovered += 1
# Stable ordering for deterministic output.
for bucket in by_tactic.values():
bucket.uncovered.sort(key=lambda t: t.id)
return GapsReport(
tactics=[by_tactic[t] for t in ALL_TACTICS],
techniques_source="",
total_techniques=total_techniques,
total_uncovered=total_uncovered,
)
# ---------------------------------------------------------------------------
# Rendering
# ---------------------------------------------------------------------------
def render_markdown(report: GapsReport) -> str:
lines: list[str] = []
lines.append("# MITRE ATT&CK Coverage Gaps")
lines.append("")
lines.append(f"_Source: {report.techniques_source}_")
lines.append("")
lines.append(
f"**{report.total_uncovered}** uncovered of "
f"**{report.total_techniques}** technique(s) in scope."
)
lines.append("")
lines.append("| Tactic | Covered | Total | Uncovered techniques |")
lines.append("|---|---:|---:|---|")
for tg in report.tactics:
if tg.total_in_tactic == 0:
continue
cells = " ".join(
f"`{t.id}` {t.name}"
for t in tg.uncovered
) if tg.uncovered else "—"
lines.append(
f"| {tg.tactic} | {tg.covered_count} | "
f"{tg.total_in_tactic} | {cells} |"
)
lines.append("")
return "\n".join(lines)
def render_json(report: GapsReport) -> str:
payload = {
"techniques_source": report.techniques_source,
"total_techniques": report.total_techniques,
"total_uncovered": report.total_uncovered,
"tactics": [
{
"tactic": tg.tactic,
"covered_count": tg.covered_count,
"total_in_tactic": tg.total_in_tactic,
"uncovered": [
{"id": t.id, "name": t.name}
for t in tg.uncovered
],
}
for tg in report.tactics
if tg.total_in_tactic > 0
],
}
return json.dumps(payload, indent=2, sort_keys=False) + "\n"
__all__ = [
"TechniqueRef",
"TacticGaps",
"GapsReport",
"load_techniques",
"compute_gaps",
"render_markdown",
"render_json",
]