diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml new file mode 100644 index 0000000..f3b3691 --- /dev/null +++ b/.github/workflows/pull-request.yml @@ -0,0 +1,47 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +# Pipeline that checks branches that have been pushed to "Main" OR +# are the source branch in a newly created pull request into "Main" +# Fails the test if there are Python syntax errors or undefined names OR pytest fails + +name: dicom-check Pytest Validation + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run unit tests for `test.py` script + run: | + python -m pytest tests/test_data_download.py + - name: Run unit tests for `preprocess.py` script + run: | + python -m pytest tests/test_preprocess.py + - name: Run unit tests for `match.py` script + run: | + python -m pytest tests/test_match.py + - name: Run unit tests for `check.py` script + run: | + python -m pytest tests/test_check.py + - name: Run unit tests for `run.py` script + run: | + python -m pytest tests/test_run.py diff --git a/.gitignore b/.gitignore index e8fb319..6de38a6 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,4 @@ pyrightconfig.json testdata/ font/ +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 0734e13..39b8a93 100644 --- a/README.md +++ b/README.md @@ -73,3 +73,8 @@ python run.py -t templates/generic-rt.json -r pdf testdata/HNSCC ``` Check the `testdata/HNSCC/check_results.csv` for a summary of all checks performed. + + +# + +https://github.com/icometrix/dicom2nifti/issues/148 \ No newline at end of file diff --git a/preprocess.py b/preprocess.py index 294ba06..195ee00 100644 --- a/preprocess.py +++ b/preprocess.py @@ -352,14 +352,14 @@ def preprocess( type=str, choices=["pdf", "html"], help="The format of the report to generate (pdf or html). If not provided, " - "no report is generated.", + "no report is generated.", ) parser.add_argument( "-o", "--output_directory", type=Path, help="The path to the directory to save the report to. " - "If not provided, the input directory will be used." + "If not provided, the input directory will be used.", ) args = parser.parse_args() diff --git a/requirements.txt b/requirements.txt index 2ecfd43..ba1b561 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,6 @@ pandas>=2.2.0 pydicom>=2.4.4 tqdm>=4.66.1 fpdf2>=2.8.2 -requests>=2.27.1 \ No newline at end of file +requests>=2.27.1 +networkx==3.0 +pytest diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..bb25b4b --- /dev/null +++ b/tests/README.md @@ -0,0 +1,68 @@ +# `dicom-check tests` + +This folder contains unit and integration tests that are run as part of GitHub Actions, but can also be run locally in the following order, corresponding to the sequential pipeline steps: +``` +# Step 1: Download test data +python -m pytest tests/test_data_download.py + +# Step 2: Preprocess DICOM files +python -m pytest tests/test_preprocess.py + +# Step 3: Match series to template +python -m pytest tests/test_match.py + +# Step 4: Perform a series of checks +python -m pytest tests/test_check.py + +# Step 5: Run the full workflow on all subdirectories +python -m pytest tests/test_run.py +``` + +## Test summary table +Each table below summarises the test scripts, their assigned IDs, purpose, and the key functions they utilise. + + +### `test_data_download.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | ------- | +| ID001 | `test_download_file_successful` | Tests successful file download | download_file | +| ID002 | `test_download_file_unsuccessful` | Tests hash mismatch handling during download | download_file | +| ID003 | `test_download_test_data` | Tests full test data download and extraction process | download_test_data | + + +### `test_preprocess.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | ------- | +| ID004 | `test_load_template` | Validates loading of the template file and its structure | load_template | +| ID005| `test_index_dicom_file_value_error` | Tests error handling for incorrect input type | index_dicom_files | +| ID006 | `test_index_dicom_file_successful` | Tests correct indexing of DICOM files and DataFrame structure | load_template, index_dicom_files | +| ID009 | `test_generate_series_json_unsuccessful` | Validates detection of data inconsistencies during JSON generation | load_template, scan_file | +| ID010 | `test_generate_series_json_successful` | Tests successful creation of series JSON from scanned DICOM data | load_template, scan_file | +| ID011 | `test_generate_series_report_unsuccessful` | Tests error raised for unsupported report format | generate_series_report | +| ID012 | `test_download_font_pack_successful` | Checks whether font pack downloads and unzips correctly | download_font_pack | +| ID013 | `test_preprocess` | Tests the whole preprocessing step including file generation | download_font_pack | + +**Note:** IDs ID007 and ID008 are not included in the table, until Phil advises me on the scan_file question I have. + +### `test_match.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | ------- | +| ID014 | `test_match_series_to_template` | Validates series matching based on template, and checks match count | match_series_to_template | + + +### `test_check.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | -------- | +| ID015 | `test_find_matched_series` | Validates matching logic for different checks and expected match counts | find_matched_series | +| ID016 | `test_perform_checks` | Compares number of passed/failed checks with expected outcome | perform_checks | + + +### `test_run.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | ------- | -------- | +| ID017 | test_run_on_all_subdirectories | Executes full pipeline on all subdirectories and validates the final summary table | run_on_all_subdirectories | diff --git a/tests/test_check.py b/tests/test_check.py new file mode 100644 index 0000000..08b5ae9 --- /dev/null +++ b/tests/test_check.py @@ -0,0 +1,69 @@ +import json +from pathlib import Path + +import pytest + +from check import perform_checks, find_matched_series + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC/HNSCC-01-0176") + + +@pytest.fixture +def series_file(dicom_path): + return dicom_path / "series.json" + + +@pytest.fixture +def template_dict(): + return Path("templates/generic-rt.json") + + +NUM_PASS = 7 +NUM_FAIL = 6 + +# For each scenario there are a number of expected outputs +cases_per_series = [ + {"name": "Planning CT", "expected_matches": 2}, + {"name": "RT Structure Set", "expected_matches": 2}, + {"name": "RT Plan", "expected_matches": 2}, + {"name": "RT Dose", "expected_matches": 2}, + {"name": ["Planning CT", "RT Structure Set"], "expected_matches": 4}, + {"name": ["RT Structure Set", "RT Plan"], "expected_matches": 4}, + {"name": ["RT Plan", "RT Dose"], "expected_matches": 4}, + { + "name": ["Planning CT", "RT Structure Set", "RT Plan", "RT Dose"], + "expected_matches": 8, + }, +] + + +# ID015 +@pytest.mark.parametrize( + "case", cases_per_series, ids=[str(c["name"]) for c in cases_per_series] +) +def test_find_matched_series(case, series_file): + + with open(series_file, "r", encoding="utf-8") as f: + series_json = json.load(f) + + result = find_matched_series(series_json=series_json, name=case["name"]) + assert len(result) == case["expected_matches"], f"Failed for: {case['name']}" + + +# ID016 +def test_perform_checks(dicom_path, series_file, template_dict): + + perform_checks(directory=dicom_path, template=template_dict, report_format="pdf") + + with open(series_file, "r", encoding="utf-8") as f: + series_json = json.load(f) + + all_checks = series_json["checks"] + passed = [check for check in all_checks if check["passed"]] + failed = [check for check in all_checks if not check["passed"]] + + assert len(passed) == NUM_PASS + assert len(failed) == NUM_FAIL diff --git a/tests/test_data_download.py b/tests/test_data_download.py new file mode 100644 index 0000000..917cbef --- /dev/null +++ b/tests/test_data_download.py @@ -0,0 +1,55 @@ +import os +from pathlib import Path + +import pytest + +from utils import download_file, download_test_data + + +@pytest.fixture +def zip_file_path(): + return Path("./testdata/HNSCC.zip") + + +@pytest.fixture +def data_path(): + return Path("./testdata") + + +# ID001: Test for downloading a file. +def test_download_file_successful(data_path): + data_path.mkdir(parents=True, exist_ok=True) + + zip_path = data_path / "HNSCC.zip" + download_file( + "https://zenodo.org/record/5276878/files/HNSCC.zip", + "6332d59406978a92f57d15da84f2e143", + zip_path, + ) + + assert zip_path.exists() + + +# ID002: Test for hash mismatch +def test_download_file_unsuccessful(data_path): + with pytest.raises(ValueError) as exc_info: + download_file( + "https://zenodo.org/record/5276878/files/HNSCC.zip", + "6332d59406978a88f57d15da84f2e143", + data_path.joinpath("HNSCC.zip"), + ) + assert "Hash mismatch" in str(exc_info.value) + + +# ID003: Download test data - full process +def test_download_test_data(zip_file_path, data_path): + download_test_data(data_path) + + # Assert ZIP file was deleted + assert not zip_file_path.exists() + + # Assert expected directories exist + extracted_path = data_path / "HNSCC" + + for name in ["HNSCC-01-0019", "HNSCC-01-0176", "HNSCC-01-0199"]: + assert extracted_path.joinpath(name).is_dir() diff --git a/tests/test_match.py b/tests/test_match.py new file mode 100644 index 0000000..8d9256b --- /dev/null +++ b/tests/test_match.py @@ -0,0 +1,40 @@ +import json +from pathlib import Path + +import pytest + +from match import match_series_to_template + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC/HNSCC-01-0176") + + +@pytest.fixture +def series_file(dicom_path): + return dicom_path / "series.json" + + +@pytest.fixture +def template_dict(): + return Path("templates/generic-rt.json") + + +EXPECTED_MATCHES = 8 + + +# ID014 +def test_match_series_to_template(dicom_path, series_file, template_dict): + + match_series_to_template( + directory=dicom_path, template=template_dict, report_format="pdf" + ) + + # Count the number of matches + with open(series_file, "r", encoding="utf-8") as f: + series_json = json.load(f) + + matches = [elem for elem in series_json["series"] if "match" in elem.keys()] + + assert len(matches) == EXPECTED_MATCHES diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py new file mode 100644 index 0000000..ad07f44 --- /dev/null +++ b/tests/test_preprocess.py @@ -0,0 +1,221 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from preprocess import ( + generate_series_json, + generate_series_report, + index_dicom_files, + load_template, + preprocess, + scan_file, +) + +from utils import download_font_pack + + +@pytest.fixture +def expected_template_keys(): + return ["project", "description", "version", "meta", "expected_series", "checks"] + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC/HNSCC-01-0176") + + +@pytest.fixture +def dicom_file(dicom_path): + return dicom_path / "03-01-2009-NA-NA-44207" / "10.000000-NA-34598" / "135-001.dcm" + + +@pytest.fixture +def template_file(): + return Path("templates/generic-rt.json") + + +expected_df_cols = [ + "patient_id", + "study_uid", + "series_uid", + "modality", + "sop_class_uid", + "sop_instance_uid", + "for_uid", + "file_path", + "date_time", + "slice_location", + "referenced_uid", + "referenced_for_uid", + "SeriesDescription", + "StudyDescription", + "StudyInstanceUID", + "DoseSummationType", +] + + +# ID004 +def test_load_template(template_file, expected_template_keys): + + template_dict = load_template(template_file) + assert expected_template_keys == list(template_dict.keys()) + + +# ID005 +def test_index_dicom_file_value_error(): + + # Provide incorrect argument type + with pytest.raises(ValueError) as exc_info: + index_dicom_files(1) + + assert "input_directory must be of type pathlib.Path or str" in str(exc_info.value) + + +# ID006 +def test_index_dicom_file_successful(template_file, dicom_path): + + # Load the files from example DICOM dir. + template = load_template(template_file) + meta = template.get("meta", []) + dicom_df = index_dicom_files(dicom_path, meta=meta) + + assert dicom_df.shape == (998, 16) + assert list(dicom_df.columns) == expected_df_cols + + +# # ID007 - Not in the README +# def test_scan_file_unsuccessful(template_file): + +# # Provide a non-DICOM file +# template = load_template(template_file) +# meta = template.get("meta", []) +# result, status, _ = scan_file(template, meta=meta) + +# assert result is None +# assert status == "error" + +# # Provide a random DICOM file +# f = r"testdata/HNSCC/HNSCC-01-0019/07-04-1998-NA-RT\ SIMULATION-48452/1.000000-NA-10361/1-1.dcm" +# result, status, _ = scan_file(f, meta=meta) + +# assert result is None +# assert status == "error" + + +# # ID008 - Not in the README +# def test_scan_file_successful(template_file, dicom_file): + +# # Load in the meta tags +# template = load_template(template_file) +# meta = template.get("meta", []) + +# # Provide a DICOM file that can be indexed +# result, status, _ = scan_file(dicom_file, meta=meta) + +# assert result is not None +# assert status == "ok" +# assert isinstance(result, dict) +# assert result["patient_id"] == "HNSCC-01-0176" + + +# ID009 +def test_generate_series_json_unsuccessful(template_file, dicom_file): + + # Load in the meta tags + template = load_template(template_file) + meta = template.get("meta", []) + + # Provide a DICOM file that can be indexed + dicom_dict1, _, _ = scan_file(dicom_file, meta=meta) + dicom_dict2, _, _ = scan_file(dicom_file, meta=meta) + + assert dicom_dict1 is not None + assert dicom_dict2 is not None + + dicom_dict2_copy = dicom_dict2 + + # Go through the 'cases' that will raise a ValueError + cases = { + "modality": " has multiple modalities", + "for_uid": " has multiple frame of references", + "referenced_uid": " has multiple referenced series", + } + + for case, message in cases.items(): + + dicom_dict2_copy[case] = "something_random" + + dicom_df = pd.DataFrame( + [dicom_dict1, dicom_dict2_copy], columns=expected_df_cols + ) + + with pytest.raises(ValueError) as exc_info: + generate_series_json(df=dicom_df, meta=meta) + + assert f"Series {list(dicom_df['series_id'])[0]}" + message in str( + exc_info.value + ) + + +# ID010 +def test_generate_series_json_successful(template_file, dicom_file): + + # Load in the meta tags + template = load_template(template_file) + meta = template.get("meta", []) + + # Provide a DICOM file that can be indexed + dicom_dict1, _, _ = scan_file(dicom_file, meta=meta) + dicom_dict2, _, _ = scan_file(dicom_file, meta=meta) + + assert dicom_dict1 is not None + assert dicom_dict2 is not None + + dicom_df = pd.DataFrame([dicom_dict1, dicom_dict2], columns=expected_df_cols) + series_json = generate_series_json(df=dicom_df, meta=meta) + + assert isinstance(series_json, dict) + + +# ID011 +def test_generate_series_report_unsuccessful(): + + # Provide invalid report format + incorrect_report_format = "py" + with pytest.raises(ValueError) as exc_info: + generate_series_report( + series_json={}, output_directory=".", report_format=incorrect_report_format + ) + + assert f"Unsupported format {incorrect_report_format}" in str(exc_info.value) + + +# ID012 +def test_download_font_pack_successful(): + + download_font_pack(Path(".")) + + zip_file_path = Path("./font_pack.zip") + + # Assert ZIP file was deleted + assert not zip_file_path.exists() + + +# ID013 +def test_preprocess(dicom_path, template_file): + + # Run the whole preprocess task + preprocess(input_directory=dicom_path, template=template_file, report_format="pdf") + + # Check that the indexed.csv is created + indexed_file = dicom_path.joinpath("indexed.csv") + assert indexed_file.exists() + + # Check that the series.json is created + series_file = dicom_path.joinpath("series.json") + assert series_file.exists() + + # Check that the pdf report is present + series_report = dicom_path.joinpath("series_report.pdf") + assert series_report.exists() diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..b5df07f --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,83 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from run import run_on_all_subdirectories + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC") + + +@pytest.fixture +def template_dict(): + return "templates/generic-rt.json" + + +def test_run_on_all_subdirectories( + dicom_path, + template_dict, +): + + # Run the processing on all subdirectories + run_on_all_subdirectories( + directory=dicom_path, template=template_dict, report_format="pdf" + ) + + result_df = pd.read_csv("testdata/HNSCC/check_results.csv") + result_df = result_df.sort_values("directory").reset_index(drop=True) + + # Check results + assert len(result_df["directory"].unique()) == 3 + assert result_df["Check if Planning CT is present"].tolist() == [True, True, True] + assert result_df["Check we have exactly one Planning CT"].tolist() == [ + True, + False, + True, + ] + assert result_df["Check if RT Structure Set is present"].tolist() == [ + True, + True, + True, + ] + assert result_df["Check we have exactly one RT Structure Set"].tolist() == [ + True, + False, + True, + ] + assert result_df["Check expected structures present in structure set"].tolist() == [ + False, + False, + False, + ] + assert result_df["Check if RT Plan is present"].tolist() == [True, True, True] + assert result_df["Check if RT Dose is present"].tolist() == [True, True, True] + + assert result_df["Check if RT Dose DoseSummationType is PLAN"].tolist() == [ + True, + True, + True, + ] + assert result_df["Check all series in same Frame of Reference"].tolist() == [ + True, + False, + True, + ] + assert result_df["Check all series in same Study"].tolist() == [True, False, True] + assert result_df["Check RTSTRUCT and Planning CT are linked"].tolist() == [ + True, + True, + True, + ] + assert result_df["Check RTPLAN and RTSTRUCT are linked"].tolist() == [ + True, + False, + True, + ] + assert result_df["Check RTDOSE and RTPLAN are linked"].tolist() == [ + True, + True, + True, + ]