diff --git a/docs/source/acknowledgement-annotations-examples.md b/docs/source/acknowledgement-annotations-examples.md new file mode 100644 index 00000000..d8076d73 --- /dev/null +++ b/docs/source/acknowledgement-annotations-examples.md @@ -0,0 +1,169 @@ +# Acknowledgement Annotations in SBOM Output + +This document shows how the `acknowledgement` field from `scanoss.json` BOM operations +is exported to CycloneDX and SPDX using their native **annotations** support. + +## scanoss.json (input) + +The `acknowledgement` and `date` fields on BOM entries capture the decision and when it was made: + +```json +{ + "bom": { + "include": [ + { + "path": "src/lib/component.js", + "purl": "pkg:npm/lodash@4.17.21", + "comment": "Vendored copy confirmed", + "acknowledgement": "Confirmed: lodash 4.17.21 vendored under src/lib", + "date": "2026-03-15T10:30:00Z" + } + ], + "replace": [ + { + "path": "src/utils/helper.js", + "purl": "pkg:npm/old-lib@1.0.0", + "replace_with": "pkg:npm/new-lib@2.0.0", + "license": "MIT", + "comment": "Upgrade to newer version", + "acknowledgement": "Verified upstream project is the correct attribution", + "date": "2026-03-10T14:00:00Z" + } + ] + } +} +``` + +## CycloneDX 1.6 export + +Annotations are a **top-level array** in the BOM. Each annotation references components +via `subjects` (using `bom-ref`) and records the annotator as a service. + +Reference: [CycloneDX 1.6 Annotations](https://cyclonedx.org/docs/1.6/json/#annotations) + +```json +{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "version": 1, + "metadata": { + "timestamp": "2026-03-23T12:00:00Z", + "tools": [ + { + "vendor": "SCANOSS", + "name": "scanoss-py", + "version": "1.49.0" + } + ] + }, + "services": [ + { + "bom-ref": "scanoss-scanner", + "name": "SCANOSS Scanner", + "provider": { "name": "SCANOSS" } + } + ], + "components": [ + { + "type": "library", + "bom-ref": "pkg:npm/lodash@4.17.21", + "name": "lodash", + "version": "4.17.21", + "purl": "pkg:npm/lodash@4.17.21", + "licenses": [{ "id": "MIT" }] + }, + { + "type": "library", + "bom-ref": "pkg:npm/new-lib@2.0.0", + "name": "new-lib", + "version": "2.0.0", + "purl": "pkg:npm/new-lib@2.0.0", + "licenses": [{ "id": "MIT" }] + } + ], + "annotations": [ + { + "subjects": ["pkg:npm/lodash@4.17.21"], + "annotator": { "service": { "bom-ref": "scanoss-scanner" } }, + "timestamp": "2026-03-15T10:30:00Z", + "text": "Confirmed: lodash 4.17.21 vendored under src/lib" + }, + { + "subjects": ["pkg:npm/new-lib@2.0.0"], + "annotator": { "service": { "bom-ref": "scanoss-scanner" } }, + "timestamp": "2026-03-10T14:00:00Z", + "text": "Verified upstream project is the correct attribution" + } + ] +} +``` + +## SPDX 2.3 export + +Annotations are also **separate entries** that reference packages via their `SPDXID`. +The annotator is identified as a tool. + +Reference: [SPDX 2.3 Annotations](https://spdx.github.io/spdx-spec/v2.3/annotations/) + +```json +{ + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "SCANOSS-SBOM", + "creationInfo": { + "created": "2026-03-23T12:00:00Z", + "creators": ["Tool: scanoss-py-1.49.0"] + }, + "documentNamespace": "https://spdx.org/spdxdocs/scanoss-py-1.49.0-abc123", + "documentDescribes": ["SPDXRef-a1b2c3", "SPDXRef-d4e5f6"], + "packages": [ + { + "name": "lodash", + "SPDXID": "SPDXRef-a1b2c3", + "versionInfo": "4.17.21", + "downloadLocation": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "licenseConcluded": "MIT", + "copyrightText": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": "pkg:npm/lodash@4.17.21" + } + ] + }, + { + "name": "new-lib", + "SPDXID": "SPDXRef-d4e5f6", + "versionInfo": "2.0.0", + "downloadLocation": "https://registry.npmjs.org/new-lib/-/new-lib-2.0.0.tgz", + "licenseConcluded": "MIT", + "copyrightText": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": "pkg:npm/new-lib@2.0.0" + } + ] + } + ], + "annotations": [ + { + "annotator": "Tool: scanoss-py-1.49.0", + "annotationDate": "2026-03-15T10:30:00Z", + "annotationType": "OTHER", + "SPDXID": "SPDXRef-a1b2c3", + "comment": "Confirmed: lodash 4.17.21 vendored under src/lib" + }, + { + "annotator": "Tool: scanoss-py-1.49.0", + "annotationDate": "2026-03-10T14:00:00Z", + "annotationType": "OTHER", + "SPDXID": "SPDXRef-d4e5f6", + "comment": "Verified upstream project is the correct attribution" + } + ] +} +``` \ No newline at end of file diff --git a/src/scanoss/cyclonedx.py b/src/scanoss/cyclonedx.py index e1012605..9794fb62 100644 --- a/src/scanoss/cyclonedx.py +++ b/src/scanoss/cyclonedx.py @@ -32,6 +32,7 @@ from cyclonedx.validation.json import JsonValidator from . import __version__ +from .scanoss_settings import find_best_match from .scanossbase import ScanossBase from .spdxlite import SpdxLite @@ -42,13 +43,14 @@ class CycloneDx(ScanossBase): Handle all interaction with CycloneDX formatting """ - def __init__(self, debug: bool = False, output_file: str = None): + def __init__(self, debug: bool = False, output_file: str = None, scanoss_settings=None): """ Initialise the CycloneDX class """ super().__init__(debug) self.output_file = output_file self.debug = debug + self.scanoss_settings = scanoss_settings self._spdx = SpdxLite(debug=debug) def parse(self, data: dict): # noqa: PLR0912, PLR0915 @@ -100,6 +102,7 @@ def parse(self, data: dict): # noqa: PLR0912, PLR0915 fdl.append({'id': name}) dc.append(name) fd['licenses'] = fdl + fd['_file_path'] = f cdx[purl] = fd else: purls = d.get('purl') @@ -158,6 +161,7 @@ def parse(self, data: dict): # noqa: PLR0912, PLR0915 continue fdl.append({'id': name}) fd['licenses'] = fdl + fd['_file_path'] = f cdx[purl] = fd # self.print_stderr(f'VD: {vdx}') # self.print_stderr(f'CDX: {cdx}') @@ -200,13 +204,12 @@ def produce_from_json(self, data: dict, output_file: str = None) -> tuple[bool, self.print_msg('Warning: Empty scan results - generating minimal CycloneDX SBOM with no components.') self._spdx.load_license_data() # Load SPDX license name data for later reference # - # Using CDX version 1.4: https://cyclonedx.org/docs/1.4/json/ + # Using CDX version 1.5: https://cyclonedx.org/docs/1.5/json/ # Validate using: https://github.com/CycloneDX/cyclonedx-cli - # cyclonedx-cli validate --input-format json --input-version v1_4 --fail-on-errors --input-file cdx.json # data = { 'bomFormat': 'CycloneDX', - 'specVersion': '1.4', + 'specVersion': '1.5', 'serialNumber': f'urn:uuid:{uuid.uuid4()}', 'version': 1, 'metadata': { @@ -255,6 +258,33 @@ def produce_from_json(self, data: dict, output_file: str = None) -> tuple[bool, c_data['cpe'] = cpe data['components'].append(c_data) # End for loop + # Build annotations from BOM rules via ScanossSettings + annotations = [] + if self.scanoss_settings: + all_entries = (self.scanoss_settings.get_bom_include() + + self.scanoss_settings.get_bom_replace()) + entries_with_ack = [e for e in all_entries if e.acknowledgement] + if entries_with_ack: + org = self.scanoss_settings.get_organization() + for purl in cdx: + comp = cdx.get(purl) + file_path = comp.get('_file_path', '') + match = find_best_match(file_path, [purl], entries_with_ack) + if match: + ts = match.timestamp + if not ts: + self.print_stderr( + f'Warning: No timestamp for annotation on {purl}, using current time' + ) + ts = data['metadata']['timestamp'] + annotations.append({ + 'subjects': [purl], + 'text': match.acknowledgement, + 'timestamp': ts, + 'annotator': {'organization': {'name': org}}, + }) + if annotations: + data['annotations'] = annotations if vdx: for vuln_id in vdx: vulns = vdx.get(vuln_id) diff --git a/src/scanoss/data/scanoss-settings-schema.json b/src/scanoss/data/scanoss-settings-schema.json index 85c551ab..d78632c9 100644 --- a/src/scanoss/data/scanoss-settings-schema.json +++ b/src/scanoss/data/scanoss-settings-schema.json @@ -18,6 +18,11 @@ "description": { "type": "string", "description": "Description of the project" + }, + "organization": { + "type": "string", + "description": "Name of the organization responsible for this project and its BOM decisions", + "examples": ["SCANOSS"] } } }, @@ -34,7 +39,7 @@ "properties": { "scanning": { "type": "array", - "description": "List of glob patterns to skip files from scanning", + "description": "List of glob patterns (gitignore format) to exclude files from scanning. Patterns are matched relative to the scan root directory.\n\n- A trailing `/` matches a directory\n- `*` matches anything except `/`\n- `**` matches zero or more directories\n- `[0-9]` matches character ranges\n- `?` matches a single character except `/`\n- Leading `!` negates a pattern", "items": { "type": "string", "examples": [ @@ -44,14 +49,17 @@ "path/to/file.c", "path/to/another/file.py", "**/*.ts", - "**/*.json" + "**/*.json", + "!important.log", + "temp/", + "debug[0-9]*.txt" ] }, "uniqueItems": true }, "fingerprinting": { "type": "array", - "description": "List of glob patterns to skip files from fingerprinting", + "description": "List of glob patterns (gitignore format) to exclude files from fingerprinting. Patterns are matched relative to the scan root directory.\n\n- A trailing `/` matches a directory\n- `*` matches anything except `/`\n- `**` matches zero or more directories\n- `[0-9]` matches character ranges\n- `?` matches a single character except `/`\n- Leading `!` negates a pattern", "items": { "type": "string", "examples": [ @@ -61,20 +69,10 @@ "path/to/file.c", "path/to/another/file.py", "**/*.ts", - "**/*.json" - ] - }, - "uniqueItems": true - }, - "dependencies": { - "type": "array", - "description": "List of glob patterns to skip dependency files from dependency analysis", - "items": { - "type": "string", - "examples": [ - "vendor/**", - "third_party/", - "node_modules/**" + "**/*.json", + "!important.log", + "temp/", + "debug[0-9]*.txt" ] }, "uniqueItems": true @@ -87,6 +85,7 @@ "properties": { "scanning": { "type": "array", + "description": "Rules for skipping files based on their size during scanning. Each rule combines glob patterns with min/max byte thresholds.", "items": { "type": "object", "properties": { @@ -115,10 +114,20 @@ "description": "Maximum size of the file in bytes" } } - } + }, + "examples": [ + [ + { + "patterns": ["*.log", "!important.log"], + "min": 512, + "max": 5242880 + } + ] + ] }, "fingerprinting": { "type": "array", + "description": "Rules for skipping files based on their size during fingerprinting. Each rule combines glob patterns with min/max byte thresholds.", "items": { "type": "object", "properties": { @@ -126,17 +135,17 @@ "type": "array", "description": "List of glob patterns to apply the min/max size rule", "items": { - "type": "string" - }, - "examples": [ - "path/to/folder", - "path/to/folder/**", - "path/to/folder/**/*", - "path/to/file.c", - "path/to/another/file.py", - "**/*.ts", - "**/*.json" - ] + "type": "string", + "examples": [ + "path/to/folder", + "path/to/folder/**", + "path/to/folder/**/*", + "path/to/file.c", + "path/to/another/file.py", + "**/*.ts", + "**/*.json" + ] + } }, "min": { "type": "integer", @@ -147,7 +156,16 @@ "description": "Maximum size of the file in bytes" } } - } + }, + "examples": [ + [ + { + "patterns": ["temp/", "*.tmp"], + "min": 512, + "max": 5242880 + } + ] + ] } } } @@ -177,7 +195,7 @@ }, "ignore_cert_errors": { "type": "boolean", - "description": "Whether to ignore certificate errors" + "description": "Whether to ignore TLS/SSL certificate errors. Intended for testing and development environments only; do not enable in production." } } }, @@ -188,7 +206,7 @@ }, "ranking_threshold": { "type": ["integer", "null"], - "description": "Ranking threshold value. A value of -1 defers to server configuration", + "description": "Ranking threshold for file snippet results (range 0\u201310). A value of `-1` defers to server configuration. Higher values require stronger matches.", "minimum": -1, "maximum": 10, "default": 0 @@ -207,7 +225,7 @@ }, "honour_file_exts": { "type": ["boolean", "null"], - "description": "Ignores file extensions. When not set, defers to server configuration.", + "description": "When `true`, the server considers file extensions during matching (e.g. a `.c` file only matches other `.c` files). When `false`, file extensions are ignored. When `null`, defers to server configuration.", "default": true }, "dependency_analysis": { @@ -216,27 +234,27 @@ }, "skip_headers": { "type": "boolean", - "description": "Skip license headers, comments and imports at the beginning of files", + "description": "Skip license headers, comments, and imports at the beginning of files before snippet matching. Works together with `skip_headers_limit` to control how many leading lines are stripped.", "default": false }, "skip_headers_limit": { "type": "integer", - "description": "Maximum number of lines to skip when filtering headers", + "description": "Maximum number of leading lines to strip when `skip_headers` is enabled. A value of `0` means no limit (strip all detected header lines).", "default": 0 } } }, "hpfm": { "type": "object", - "description": "HPFM (High Precision Folder Matching) configuration", + "description": "HPFM (High Precision Folder Matching) configuration. HPFM detects copied directory structures by comparing folder-level fingerprints.", "properties": { "ranking_enabled": { "type": "boolean", - "description": "Enable ranking for HPFM" + "description": "Enable ranking for HPFM results" }, "ranking_threshold": { "type": ["integer", "null"], - "description": "Ranking threshold value. A value of -1 defers to server configuration", + "description": "Ranking threshold for HPFM results (range 0\u201399). Note: this range differs from `file_snippet.ranking_threshold` (0\u201310). A value of `-1` defers to server configuration. Higher values require stronger matches.", "minimum": -1, "maximum": 99, "default": 0 @@ -255,18 +273,18 @@ "properties": { "include": { "type": "array", - "description": "Set of rules to be added as context when scanning. This list will be sent as payload to the API.", + "description": "Rules for adding components as scan context. Sent to the SCANOSS API to influence result matching. Requires purl; path is optional for partial matching.", "items": { "type": "object", "properties": { "path": { "type": "string", - "description": "File or folder path. Paths ending with '/' are treated as folder rules and match all files under that directory.", + "description": "File or folder path, relative to the scan root. Paths ending with `/` are treated as folder rules and match all files under that directory.", "examples": ["src/main.c", "src/vendor/"] }, "purl": { "type": "string", - "description": "Package URL to be used to match the component", + "description": "Package URL identifying the component. Format: `pkg://@`. Version is recommended but optional.", "examples": [ "pkg:npm/vue@2.6.12", "pkg:golang/github.com/golang/go@1.17.3" @@ -275,26 +293,51 @@ "comment": { "type": "string", "description": "Additional notes or comments" + }, + "acknowledgement": { + "type": "string", + "description": "Formal acknowledgement of this BOM decision, propagated to SBOM output" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 UTC timestamp recording when this BOM decision was made", + "examples": ["2026-03-15T10:30:00Z"] } }, "uniqueItems": true, "required": ["purl"] - } + }, + "examples": [ + [ + { + "path": "src/lib/component.js", + "purl": "pkg:npm/lodash@4.17.21", + "comment": "Full match: path + purl", + "acknowledgement": "Confirmed: lodash 4.17.21 vendored under src/lib", + "timestamp": "2026-03-15T10:30:00Z" + }, + { + "purl": "pkg:npm/vue@2.6.12", + "comment": "Partial match: purl only" + } + ] + ] }, "remove": { "type": "array", - "description": "Set of rules that will remove files from the results file after the scan is completed.", + "description": "Rules for removing components from results after scanning (client-side post-processing). Supports full match (both path and purl) or partial match (path only or purl only).", "items": { "type": "object", "properties": { "path": { "type": "string", - "description": "File or folder path. Paths ending with '/' are treated as folder rules and match all files under that directory.", + "description": "File or folder path, relative to the scan root. Paths ending with `/` are treated as folder rules and match all files under that directory.", "examples": ["src/main.c", "src/vendor/"] }, "purl": { "type": "string", - "description": "Package URL", + "description": "Package URL identifying the component to remove. Format: `pkg://@`. Version is recommended but optional.", "examples": [ "pkg:npm/vue@2.6.12", "pkg:golang/github.com/golang/go@1.17.3" @@ -303,6 +346,16 @@ "comment": { "type": "string", "description": "Additional notes or comments" + }, + "acknowledgement": { + "type": "string", + "description": "Formal acknowledgement of this BOM decision, propagated to SBOM output" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 UTC timestamp recording when this BOM decision was made", + "examples": ["2026-03-15T10:30:00Z"] } }, "anyOf": [ @@ -310,22 +363,41 @@ {"required": ["path"]} ], "uniqueItems": true - } + }, + "examples": [ + [ + { + "path": "src/main.c", + "purl": "pkg:npm/vue@2.6.12", + "comment": "Full match: removes only this purl at this path", + "acknowledgement": "Reviewed and confirmed as original code, not third-party", + "timestamp": "2026-03-15T10:30:00Z" + }, + { + "purl": "pkg:npm/deprecated-pkg@1.0.0", + "comment": "Partial match: removes this purl wherever it appears" + }, + { + "path": "src/vendor/", + "comment": "Partial match: removes all results under this folder" + } + ] + ] }, "replace": { "type": "array", - "description": "Set of rules that will replace components with the specified one after the scan is completed.", + "description": "Rules for replacing components in results after scanning (client-side post-processing). Requires both purl (original component) and replace_with (replacement). Path is optional for scoping.", "items": { "type": "object", "properties": { "path": { "type": "string", - "description": "File or folder path. Paths ending with '/' are treated as folder rules and match all files under that directory.", + "description": "File or folder path, relative to the scan root. Paths ending with `/` are treated as folder rules and match all files under that directory.", "examples": ["src/main.c", "src/vendor/"] }, "purl": { "type": "string", - "description": "Package URL to replace", + "description": "Package URL of the original component to replace. Format: `pkg://@`. Version is recommended but optional.", "examples": [ "pkg:npm/vue@2.6.12", "pkg:golang/github.com/golang/go@1.17.3" @@ -335,23 +407,46 @@ "type": "string", "description": "Additional notes or comments" }, + "acknowledgement": { + "type": "string", + "description": "Formal acknowledgement of this BOM decision, propagated to SBOM output" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 UTC timestamp recording when this BOM decision was made", + "examples": ["2026-03-15T10:30:00Z"] + }, "license": { "type": "string", - "description": "License of the component. Should be a valid SPDX license expression", + "description": "License of the replacement component. Should be a valid SPDX license expression (e.g. `MIT`, `Apache-2.0`, `GPL-3.0-only`).", "examples": ["MIT", "Apache-2.0"] }, "replace_with": { "type": "string", - "description": "Package URL to replace with", + "description": "The replacement Package URL. Format: `pkg://@`.", "examples": [ - "pkg:npm/vue@2.6.12", - "pkg:golang/github.com/golang/go@1.17.3" + "pkg:npm/vue@2.6.14", + "pkg:npm/new-lib@2.0.0" ] } }, "uniqueItems": true, "required": ["purl", "replace_with"] - } + }, + "examples": [ + [ + { + "path": "src/utils/helper.js", + "purl": "pkg:npm/old-lib@1.0.0", + "replace_with": "pkg:npm/new-lib@2.0.0", + "license": "MIT", + "comment": "Upgrade to newer version", + "acknowledgement": "Verified upstream project is the correct attribution", + "timestamp": "2026-03-15T10:30:00Z" + } + ] + ] } } } diff --git a/src/scanoss/scanner.py b/src/scanoss/scanner.py index 1467b328..a218d457 100644 --- a/src/scanoss/scanner.py +++ b/src/scanoss/scanner.py @@ -612,10 +612,10 @@ def __finish_scan_threaded(self, file_map: Optional[Dict[Any, Any]] = None) -> b if self.output_format == 'plain': self.__log_result(json.dumps(results, indent=2, sort_keys=True)) elif self.output_format == 'cyclonedx': - cdx = CycloneDx(self.debug, self.scan_output) + cdx = CycloneDx(self.debug, self.scan_output, scanoss_settings=self.scanoss_settings) success, _ = cdx.produce_from_json(results) elif self.output_format == 'spdxlite': - spdxlite = SpdxLite(self.debug, self.scan_output) + spdxlite = SpdxLite(self.debug, self.scan_output, scanoss_settings=self.scanoss_settings) success = spdxlite.produce_from_json(results) elif self.output_format == 'csv': csvo = CsvOutput(self.debug, self.scan_output) @@ -1050,10 +1050,10 @@ def scan_wfp(self, wfp: str) -> bool: if self.output_format == 'plain': self.__log_result(raw_output) elif self.output_format == 'cyclonedx': - cdx = CycloneDx(self.debug, self.scan_output) + cdx = CycloneDx(self.debug, self.scan_output, scanoss_settings=self.scanoss_settings) cdx.produce_from_str(raw_output) elif self.output_format == 'spdxlite': - spdxlite = SpdxLite(self.debug, self.scan_output) + spdxlite = SpdxLite(self.debug, self.scan_output, scanoss_settings=self.scanoss_settings) success = spdxlite.produce_from_str(raw_output) elif self.output_format == 'csv': csvo = CsvOutput(self.debug, self.scan_output) diff --git a/src/scanoss/scanoss_settings.py b/src/scanoss/scanoss_settings.py index 7a39fbaa..18b1d61d 100644 --- a/src/scanoss/scanoss_settings.py +++ b/src/scanoss/scanoss_settings.py @@ -45,6 +45,8 @@ class BomEntry: purl: Optional[str] = None path: Optional[str] = None comment: Optional[str] = None + acknowledgement: Optional[str] = None + timestamp: Optional[str] = None @classmethod def from_dict(cls, data: dict) -> 'BomEntry': @@ -54,6 +56,8 @@ def from_dict(cls, data: dict) -> 'BomEntry': purl=data.get('purl'), path=path, comment=data.get('comment'), + acknowledgement=data.get('acknowledgement'), + timestamp=data.get('timestamp'), ) def matches_path(self, result_path: str) -> bool: @@ -109,6 +113,8 @@ def from_dict(cls, data: dict) -> 'ReplaceRule': purl=data.get('purl'), path=path, comment=data.get('comment'), + acknowledgement=data.get('acknowledgement'), + timestamp=data.get('timestamp'), replace_with=data.get('replace_with'), license=data.get('license'), ) @@ -318,6 +324,10 @@ def _get_bom(self): return [] return self.data.get('bom', {}) + def get_organization(self) -> str: + """Get the organization name from self section. Returns 'unspecified' if not set.""" + return self.data.get('self', {}).get('organization') or 'unspecified' + def get_bom_include(self) -> List[BomEntry]: """ Get the list of components to include in the scan diff --git a/src/scanoss/spdxlite.py b/src/scanoss/spdxlite.py index 3e13af89..7c609deb 100644 --- a/src/scanoss/spdxlite.py +++ b/src/scanoss/spdxlite.py @@ -34,6 +34,7 @@ from packageurl import PackageURL from . import __version__ +from .scanoss_settings import find_best_match class SpdxLite: @@ -42,12 +43,13 @@ class SpdxLite: Handle all interaction with SPDX Lite formatting """ - def __init__(self, debug: bool = False, output_file: str = None): + def __init__(self, debug: bool = False, output_file: str = None, scanoss_settings=None): """ Initialise the SpdxLite class """ self.output_file = output_file self.debug = debug + self.scanoss_settings = scanoss_settings self._spdx_licenses = {} # Used to lookup for valid SPDX license identifiers self._spdx_lic_names = {} # Used to look for SPDX license identifiers by name @@ -136,7 +138,9 @@ def _process_dependency_entry(self, file_path: str, entry: dict, summary: dict): if not self._is_valid_purl(file_path, dep, purl, summary): continue # Modifying the summary dictionary directly as it's passed by reference - summary[purl] = self._create_dependency_summary(dep) + dep_summary = self._create_dependency_summary(dep) + dep_summary['_file_path'] = file_path + summary[purl] = dep_summary def _process_file_entry(self, file_path: str, entry: dict, summary: dict): """ @@ -156,7 +160,9 @@ def _process_file_entry(self, file_path: str, entry: dict, summary: dict): if not self._is_valid_purl(file_path, entry, purl, summary): return - summary[purl] = self._create_file_summary(entry) + file_summary = self._create_file_summary(entry) + file_summary['_file_path'] = file_path + summary[purl] = file_summary def _is_valid_purl(self, file_path: str, entry: dict, purl: str, summary: dict) -> bool: """ @@ -291,6 +297,7 @@ def produce_from_json(self, data: json, output_file: str = None) -> bool: self.load_license_data() spdx_document = self._create_base_document(raw_data) self._process_packages(raw_data, spdx_document) + self._build_annotations(raw_data, spdx_document) return self._write_output(spdx_document, output_file) def _create_base_document(self, raw_data: dict) -> dict: @@ -390,6 +397,36 @@ def _process_packages(self, raw_data: dict, spdx_document: dict): self._process_license_refs(lic_refs, spdx_document) + def _build_annotations(self, raw_data: dict, spdx_document: dict): + """Build SPDX annotations from BOM rules via ScanossSettings.""" + if not self.scanoss_settings: + return + all_entries = (self.scanoss_settings.get_bom_include() + + self.scanoss_settings.get_bom_replace()) + entries_with_ack = [e for e in all_entries if e.acknowledgement] + if not entries_with_ack: + return + annotations = [] + org = self.scanoss_settings.get_organization() + for purl, comp in raw_data.items(): + file_path = comp.get('_file_path', '') + match = find_best_match(file_path, [purl], entries_with_ack) + if match: + ts = match.timestamp + if not ts: + self.print_stderr( + f'Warning: No timestamp for annotation on {purl}, using current time' + ) + ts = spdx_document['creationInfo']['created'] + annotations.append({ + 'annotationDate': ts, + 'annotationType': 'REVIEW', + 'annotator': f'Organization: {org}', + 'comment': match.acknowledgement, + }) + if annotations: + spdx_document['annotations'] = annotations + def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict: """ Create package information for SPDX document. @@ -426,7 +463,7 @@ def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict: purl_ver = f'{purl}@{comp_ver}' purl_hash = hashlib.md5(purl_ver.encode('utf-8')).hexdigest() - return { + package_info = { 'name': comp.get('component'), 'SPDXID': f'SPDXRef-{purl_hash}', 'versionInfo': comp_ver, @@ -451,6 +488,7 @@ def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict: } ], } + return package_info def _process_package_licenses(self, licenses: list, lic_refs: set) -> str: """ diff --git a/tests/test_acknowledgement_output.py b/tests/test_acknowledgement_output.py new file mode 100644 index 00000000..01b343f6 --- /dev/null +++ b/tests/test_acknowledgement_output.py @@ -0,0 +1,273 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +import json +import os +import tempfile +import unittest +from unittest.mock import MagicMock + +from scanoss.cyclonedx import CycloneDx +from scanoss.scanoss_settings import BomEntry +from scanoss.spdxlite import SpdxLite + + +def _make_scan_result(purl, component='test-comp', version='1.0.0'): + """Create a minimal scan result entry (no acknowledgement — that comes from BOM rules).""" + entry = { + 'id': 'file', + 'purl': [purl], + 'component': component, + 'version': version, + 'vendor': 'test-vendor', + 'url': f'https://github.com/test/{component}', + 'licenses': [{'name': 'MIT', 'source': 'component_declared'}], + } + return {'test/file.c': [entry]} + + +def _make_settings(bom_include=None, bom_replace=None, organization='SCANOSS'): + """Create a mock ScanossSettings with BOM entries.""" + settings = MagicMock() + settings.get_bom_include.return_value = bom_include or [] + settings.get_bom_replace.return_value = bom_replace or [] + settings.get_organization.return_value = organization + return settings + + +class TestCycloneDxAnnotations(unittest.TestCase): + """Test annotation building from BOM rules in CycloneDX output""" + + def test_cdx_annotation_from_bom_include(self): + """CycloneDX should build annotation from matching BOM include rule""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='confirmed', + timestamp='2026-03-15T10:30:00Z'), + ]) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['text'], 'confirmed') + self.assertEqual(annotations[0]['timestamp'], '2026-03-15T10:30:00Z') + self.assertEqual(annotations[0]['subjects'], ['pkg:npm/test@1.0.0']) + self.assertEqual(annotations[0]['annotator']['organization']['name'], 'SCANOSS') + + def test_cdx_no_annotations_without_settings(self): + """CycloneDX should not include annotations when no settings provided""" + data = _make_scan_result('pkg:npm/test@1.0.0') + cdx = CycloneDx() + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + self.assertNotIn('annotations', output) + + def test_cdx_no_annotations_when_no_matching_rule(self): + """CycloneDX should not annotate components without matching BOM rules""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/other@2.0.0', acknowledgement='confirmed'), + ]) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + self.assertNotIn('annotations', output) + + def test_cdx_timestamp_fallback_to_current_time(self): + """CycloneDX annotation should fall back to metadata timestamp when rule has no timestamp""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='confirmed'), + ]) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['timestamp'], output['metadata']['timestamp']) + + def test_cdx_organization_from_settings(self): + """CycloneDX annotation should use organization from settings""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings( + bom_include=[BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='ack')], + organization='MyOrg', + ) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + annotations = output.get('annotations', []) + self.assertEqual(annotations[0]['annotator']['organization']['name'], 'MyOrg') + + def test_cdx_no_component_properties(self): + """CycloneDX components should NOT have properties for acknowledgement""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='confirmed'), + ]) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + components = output.get('components', []) + self.assertEqual(len(components), 1) + self.assertNotIn('properties', components[0]) + + def test_cdx_dependency_annotation(self): + """CycloneDX should build annotation for dependency entries""" + data = { + 'test/package.json': [{ + 'id': 'dependency', + 'dependencies': [{ + 'purl': 'pkg:npm/dep@2.0.0', + 'component': 'dep', + 'version': '2.0.0', + 'licenses': [{'name': 'Apache-2.0'}], + }], + }], + } + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/dep@2.0.0', acknowledgement='noticed', + timestamp='2026-03-15T10:30:00Z'), + ]) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['text'], 'noticed') + self.assertEqual(annotations[0]['subjects'], ['pkg:npm/dep@2.0.0']) + + def test_cdx_replace_rule_annotation(self): + """CycloneDX should build annotation from matching BOM replace rule""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_replace=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='replaced and verified', + timestamp='2026-03-15T10:30:00Z'), + ]) + cdx = CycloneDx(scanoss_settings=settings) + success, output = cdx.produce_from_json(data) + self.assertTrue(success) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['text'], 'replaced and verified') + + +class TestSpdxLiteAnnotations(unittest.TestCase): + """Test annotation building from BOM rules in SPDX output""" + + def _produce_spdx(self, data, settings=None): + """Helper to produce SPDX output and return parsed JSON.""" + spdx = SpdxLite(scanoss_settings=settings) + temp_dir = tempfile.gettempdir() + output_file = os.path.join(temp_dir, f'test_spdx_{id(self)}.json') + try: + success = spdx.produce_from_json(data, output_file) + self.assertTrue(success) + with open(output_file, 'r') as f: + return json.load(f) + finally: + if os.path.exists(output_file): + os.remove(output_file) + + def test_spdx_annotation_from_bom_include(self): + """SPDX should build annotation from matching BOM include rule""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='confirmed', + timestamp='2026-03-15T10:30:00Z'), + ]) + output = self._produce_spdx(data, settings) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['comment'], 'confirmed') + self.assertEqual(annotations[0]['annotationDate'], '2026-03-15T10:30:00Z') + self.assertEqual(annotations[0]['annotationType'], 'REVIEW') + self.assertEqual(annotations[0]['annotator'], 'Organization: SCANOSS') + + def test_spdx_no_annotations_without_settings(self): + """SPDX should not include annotations when no settings provided""" + data = _make_scan_result('pkg:npm/test@1.0.0') + output = self._produce_spdx(data) + self.assertNotIn('annotations', output) + + def test_spdx_timestamp_fallback(self): + """SPDX annotation should fall back to creation date when rule has no timestamp""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='confirmed'), + ]) + output = self._produce_spdx(data, settings) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['annotationDate'], output['creationInfo']['created']) + + def test_spdx_organization_from_settings(self): + """SPDX annotation should use organization from settings""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings( + bom_include=[BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='ack')], + organization='MyOrg', + ) + output = self._produce_spdx(data, settings) + annotations = output.get('annotations', []) + self.assertEqual(annotations[0]['annotator'], 'Organization: MyOrg') + + def test_spdx_no_package_comment(self): + """SPDX packages should NOT have comment for acknowledgement""" + data = _make_scan_result('pkg:npm/test@1.0.0') + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/test@1.0.0', acknowledgement='confirmed'), + ]) + output = self._produce_spdx(data, settings) + packages = output.get('packages', []) + self.assertEqual(len(packages), 1) + self.assertNotIn('comment', packages[0]) + + def test_spdx_dependency_annotation(self): + """SPDX should build annotation for dependency entries""" + data = { + 'test/package.json': [{ + 'id': 'dependency', + 'dependencies': [{ + 'purl': 'pkg:npm/dep@2.0.0', + 'component': 'dep', + 'version': '2.0.0', + 'url': 'https://github.com/test/dep', + 'licenses': [{'name': 'MIT'}], + }], + }], + } + settings = _make_settings(bom_include=[ + BomEntry(purl='pkg:npm/dep@2.0.0', acknowledgement='noticed', + timestamp='2026-03-15T10:30:00Z'), + ]) + output = self._produce_spdx(data, settings) + annotations = output.get('annotations', []) + self.assertEqual(len(annotations), 1) + self.assertEqual(annotations[0]['comment'], 'noticed') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_bom_path_matching.py b/tests/test_bom_path_matching.py index ae0db00c..cde6d38c 100644 --- a/tests/test_bom_path_matching.py +++ b/tests/test_bom_path_matching.py @@ -85,6 +85,33 @@ def test_exact_path_does_not_prefix_match(self): self.assertFalse(BomEntry(path='src/main.c').matches_path('src/main.cpp')) +class TestAcknowledgementParsing(unittest.TestCase): + """Unit tests for acknowledgement field parsing in BomEntry and ReplaceRule""" + + def test_bom_entry_parses_acknowledgement(self): + entry = BomEntry.from_dict({'purl': 'pkg:npm/vue', 'acknowledgement': 'noticed'}) + self.assertEqual(entry.acknowledgement, 'noticed') + + def test_bom_entry_acknowledgement_none_when_absent(self): + entry = BomEntry.from_dict({'purl': 'pkg:npm/vue'}) + self.assertIsNone(entry.acknowledgement) + + def test_replace_rule_parses_acknowledgement(self): + entry = ReplaceRule.from_dict({ + 'purl': 'pkg:npm/old', + 'replace_with': 'pkg:npm/new', + 'acknowledgement': 'acknowledged', + }) + self.assertEqual(entry.acknowledgement, 'acknowledged') + + def test_replace_rule_acknowledgement_none_when_absent(self): + entry = ReplaceRule.from_dict({ + 'purl': 'pkg:npm/old', + 'replace_with': 'pkg:npm/new', + }) + self.assertIsNone(entry.acknowledgement) + + class TestFromDictNormalization(unittest.TestCase): """Unit tests for trailing-slash normalization in from_dict"""