Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,10 @@ Use `scripts/sri_parser.py` when you need a focused crawl that inventories
"unsafe" Subresource Integrity implementations called out by
[SecurityScorecard's guidance](https://support.securityscorecard.com/hc/en-us/articles/41067186972827-Unsafe-Implementation-of-Subresource-Integrity-SRI).
By default the scanner inspects only the requested page so the results mirror
SecurityScorecard's behaviour. Add the `--crawl` flag to follow same-origin
links, inspect third-party JavaScript and CSS includes across multiple pages,
and report every resource that:
SecurityScorecard's behaviour. The summary now also reports how many external
resources already include an `integrity` attribute. Add the `--crawl` flag to
follow same-origin links, inspect third-party JavaScript and CSS includes across
multiple pages, and report every resource that:

- Omits an `integrity` attribute entirely
- Supplies hashes that do not start with `sha256-`, `sha384-`, or `sha512-`
Expand All @@ -173,12 +174,17 @@ you can tell whether a compensating control is in place.
# Human-readable output for the landing page only
python scripts/sri_parser.py https://example.com

# List every external include that already uses SRI
python scripts/sri_parser.py https://example.com --list-sri

# JSON report with a deeper crawl (depth 2, up to 50 pages)
python scripts/sri_parser.py https://example.com --crawl --max-depth 2 --max-pages 50 --json
```

The report lists the affected page, resource URL, integrity/crossorigin values,
and short reason codes for each unsafe include.
and short reason codes for each unsafe include. When `--list-sri` is supplied the
output also enumerates each external script and stylesheet that defines an
`integrity` attribute.

## Documentation

Expand Down
55 changes: 50 additions & 5 deletions scripts/sri_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(
self.to_visit: collections.deque[Tuple[str, int]] = collections.deque([(self.base_url, 0)])
self.unsafe_resources: List[UnsafeResource] = []
self.csp_policies: List[Tuple[str, str, str]] = [] # (page_url, header_name, policy_value)
self.resources_with_integrity: List[Dict[str, object]] = []

# ------------------------------------------------------------------
# Crawling helpers
Expand Down Expand Up @@ -140,10 +141,26 @@ def _analyze_resource(self, tag, tag_type: str, page_url: str) -> Optional[Unsaf
parsed = urlparse(resource_url)
reasons: List[str] = []

if not integrity:
reasons.append("missing-integrity")
else:
valid_hashes: List[str] = []
invalid_hashes: List[str] = []

if integrity:
valid_hashes, invalid_hashes = self._parse_integrity_tokens(integrity)
self.resources_with_integrity.append(
{
"page_url": page_url,
"resource_url": resource_url,
"tag_type": tag_type,
"integrity": integrity,
"crossorigin": crossorigin,
"valid_hashes": valid_hashes,
"invalid_hashes": invalid_hashes,
}
)
else:
reasons.append("missing-integrity")

if integrity:
if not valid_hashes:
reasons.append("invalid-integrity-hash")
elif invalid_hashes:
Expand Down Expand Up @@ -262,6 +279,8 @@ def crawl(self) -> Dict[str, object]:
{"page_url": page_url, "header": header, "value": policy}
for page_url, header, policy in self.csp_policies
],
"resources_with_integrity_count": len(self.resources_with_integrity),
"resources_with_integrity": self.resources_with_integrity,
}
return report

Expand Down Expand Up @@ -299,16 +318,25 @@ def build_arg_parser() -> argparse.ArgumentParser:
action="store_true",
help="Output the report as JSON instead of human-readable text",
)
parser.add_argument(
"--list-sri",
action="store_true",
help="List all external resources that include an integrity attribute",
)
return parser


def print_report(report: Dict[str, object], as_json: bool = False) -> None:
def print_report(report: Dict[str, object], as_json: bool = False, list_all: bool = False) -> None:
if as_json:
print(json.dumps(report, indent=2))
return

print(f"SRI Parser report for {report['base_url']}")
print(f"Pages crawled: {report['pages_crawled']}")
print(
"SRI resources with integrity attribute: "
f"{report['resources_with_integrity_count']}"
)
print()

if report["compensating_control_detected"]:
Expand Down Expand Up @@ -336,6 +364,23 @@ def print_report(report: Dict[str, object], as_json: bool = False) -> None:
for entry in report["csp_policies"]:
print(f"- {entry['page_url']} [{entry['header']}] -> {entry['value']}")

if list_all:
print()
if report["resources_with_integrity"]:
print("All resources with integrity attributes:")
for entry in report["resources_with_integrity"]:
valid = ", ".join(entry["valid_hashes"]) or "None"
invalid = ", ".join(entry["invalid_hashes"]) or "None"
print(f"- {entry['resource_url']} ({entry['tag_type']})")
print(f" Page: {entry['page_url']}")
print(f" Integrity: {entry['integrity']}")
print(f" Crossorigin: {entry['crossorigin'] or 'None'}")
print(f" Valid hashes: {valid}")
print(f" Invalid hashes: {invalid}")
print()
else:
print("No resources with integrity attributes detected.")


def main() -> None:
parser = build_arg_parser()
Expand All @@ -348,7 +393,7 @@ def main() -> None:
timeout=args.timeout,
)
report = sri_parser.crawl()
print_report(report, as_json=args.json)
print_report(report, as_json=args.json, list_all=args.list_sri)


if __name__ == "__main__":
Expand Down