From 07ddcce19459756e206cbf883dfe34851ae36276 Mon Sep 17 00:00:00 2001 From: Sasha-Barisic Date: Fri, 30 May 2025 18:47:53 +1000 Subject: [PATCH 1/5] pytests and github actions --- .github/workflows/pull-request.yml | 47 ++++++ .gitignore | 1 + README.md | 5 + preprocess.py | 4 +- requirements.txt | 3 +- tests/README.md | 68 +++++++++ tests/test_check.py | 69 +++++++++ tests/test_data_download.py | 55 +++++++ tests/test_match.py | 40 ++++++ tests/test_preprocess.py | 221 +++++++++++++++++++++++++++++ tests/test_run.py | 83 +++++++++++ 11 files changed, 593 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/pull-request.yml create mode 100644 tests/README.md create mode 100644 tests/test_check.py create mode 100644 tests/test_data_download.py create mode 100644 tests/test_match.py create mode 100644 tests/test_preprocess.py create mode 100644 tests/test_run.py diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml new file mode 100644 index 0000000..eae2d2e --- /dev/null +++ b/.github/workflows/pull-request.yml @@ -0,0 +1,47 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +# Pipeline that checks branches that have been pushed to "Main" OR +# are the source branch in a newly created pull request into "Main" +# Fails the test if there are Python syntax errors or undefined names OR pytest fails + +name: dicom-check Pytest Validation + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9"] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run unit tests for `test.py` script + run: | + python -m pytest tests/test_data_download.py + - name: Run unit tests for `preprocess.py` script + run: | + python -m pytest tests/test_preprocess.py + - name: Run unit tests for `match.py` script + run: | + python -m pytest tests/test_match.py + - name: Run unit tests for `check.py` script + run: | + python -m pytest tests/test_check.py + - name: Run unit tests for `run.py` script + run: | + python -m pytest tests/test_run.py diff --git a/.gitignore b/.gitignore index e8fb319..6de38a6 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,4 @@ pyrightconfig.json testdata/ font/ +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 0734e13..39b8a93 100644 --- a/README.md +++ b/README.md @@ -73,3 +73,8 @@ python run.py -t templates/generic-rt.json -r pdf testdata/HNSCC ``` Check the `testdata/HNSCC/check_results.csv` for a summary of all checks performed. + + +# + +https://github.com/icometrix/dicom2nifti/issues/148 \ No newline at end of file diff --git a/preprocess.py b/preprocess.py index 294ba06..195ee00 100644 --- a/preprocess.py +++ b/preprocess.py @@ -352,14 +352,14 @@ def preprocess( type=str, choices=["pdf", "html"], help="The format of the report to generate (pdf or html). If not provided, " - "no report is generated.", + "no report is generated.", ) parser.add_argument( "-o", "--output_directory", type=Path, help="The path to the directory to save the report to. " - "If not provided, the input directory will be used." + "If not provided, the input directory will be used.", ) args = parser.parse_args() diff --git a/requirements.txt b/requirements.txt index 2ecfd43..96d6399 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ pandas>=2.2.0 pydicom>=2.4.4 tqdm>=4.66.1 fpdf2>=2.8.2 -requests>=2.27.1 \ No newline at end of file +requests>=2.27.1 +networkx==3.4.2 diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..bb25b4b --- /dev/null +++ b/tests/README.md @@ -0,0 +1,68 @@ +# `dicom-check tests` + +This folder contains unit and integration tests that are run as part of GitHub Actions, but can also be run locally in the following order, corresponding to the sequential pipeline steps: +``` +# Step 1: Download test data +python -m pytest tests/test_data_download.py + +# Step 2: Preprocess DICOM files +python -m pytest tests/test_preprocess.py + +# Step 3: Match series to template +python -m pytest tests/test_match.py + +# Step 4: Perform a series of checks +python -m pytest tests/test_check.py + +# Step 5: Run the full workflow on all subdirectories +python -m pytest tests/test_run.py +``` + +## Test summary table +Each table below summarises the test scripts, their assigned IDs, purpose, and the key functions they utilise. + + +### `test_data_download.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | ------- | +| ID001 | `test_download_file_successful` | Tests successful file download | download_file | +| ID002 | `test_download_file_unsuccessful` | Tests hash mismatch handling during download | download_file | +| ID003 | `test_download_test_data` | Tests full test data download and extraction process | download_test_data | + + +### `test_preprocess.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | ------- | +| ID004 | `test_load_template` | Validates loading of the template file and its structure | load_template | +| ID005| `test_index_dicom_file_value_error` | Tests error handling for incorrect input type | index_dicom_files | +| ID006 | `test_index_dicom_file_successful` | Tests correct indexing of DICOM files and DataFrame structure | load_template, index_dicom_files | +| ID009 | `test_generate_series_json_unsuccessful` | Validates detection of data inconsistencies during JSON generation | load_template, scan_file | +| ID010 | `test_generate_series_json_successful` | Tests successful creation of series JSON from scanned DICOM data | load_template, scan_file | +| ID011 | `test_generate_series_report_unsuccessful` | Tests error raised for unsupported report format | generate_series_report | +| ID012 | `test_download_font_pack_successful` | Checks whether font pack downloads and unzips correctly | download_font_pack | +| ID013 | `test_preprocess` | Tests the whole preprocessing step including file generation | download_font_pack | + +**Note:** IDs ID007 and ID008 are not included in the table, until Phil advises me on the scan_file question I have. + +### `test_match.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | ------- | +| ID014 | `test_match_series_to_template` | Validates series matching based on template, and checks match count | match_series_to_template | + + +### `test_check.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | -------- | -------- | +| ID015 | `test_find_matched_series` | Validates matching logic for different checks and expected match counts | find_matched_series | +| ID016 | `test_perform_checks` | Compares number of passed/failed checks with expected outcome | perform_checks | + + +### `test_run.py` + +| Test-ID | Test name | Description | Functions tested | +| -------- | ------- | ------- | -------- | +| ID017 | test_run_on_all_subdirectories | Executes full pipeline on all subdirectories and validates the final summary table | run_on_all_subdirectories | diff --git a/tests/test_check.py b/tests/test_check.py new file mode 100644 index 0000000..08b5ae9 --- /dev/null +++ b/tests/test_check.py @@ -0,0 +1,69 @@ +import json +from pathlib import Path + +import pytest + +from check import perform_checks, find_matched_series + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC/HNSCC-01-0176") + + +@pytest.fixture +def series_file(dicom_path): + return dicom_path / "series.json" + + +@pytest.fixture +def template_dict(): + return Path("templates/generic-rt.json") + + +NUM_PASS = 7 +NUM_FAIL = 6 + +# For each scenario there are a number of expected outputs +cases_per_series = [ + {"name": "Planning CT", "expected_matches": 2}, + {"name": "RT Structure Set", "expected_matches": 2}, + {"name": "RT Plan", "expected_matches": 2}, + {"name": "RT Dose", "expected_matches": 2}, + {"name": ["Planning CT", "RT Structure Set"], "expected_matches": 4}, + {"name": ["RT Structure Set", "RT Plan"], "expected_matches": 4}, + {"name": ["RT Plan", "RT Dose"], "expected_matches": 4}, + { + "name": ["Planning CT", "RT Structure Set", "RT Plan", "RT Dose"], + "expected_matches": 8, + }, +] + + +# ID015 +@pytest.mark.parametrize( + "case", cases_per_series, ids=[str(c["name"]) for c in cases_per_series] +) +def test_find_matched_series(case, series_file): + + with open(series_file, "r", encoding="utf-8") as f: + series_json = json.load(f) + + result = find_matched_series(series_json=series_json, name=case["name"]) + assert len(result) == case["expected_matches"], f"Failed for: {case['name']}" + + +# ID016 +def test_perform_checks(dicom_path, series_file, template_dict): + + perform_checks(directory=dicom_path, template=template_dict, report_format="pdf") + + with open(series_file, "r", encoding="utf-8") as f: + series_json = json.load(f) + + all_checks = series_json["checks"] + passed = [check for check in all_checks if check["passed"]] + failed = [check for check in all_checks if not check["passed"]] + + assert len(passed) == NUM_PASS + assert len(failed) == NUM_FAIL diff --git a/tests/test_data_download.py b/tests/test_data_download.py new file mode 100644 index 0000000..917cbef --- /dev/null +++ b/tests/test_data_download.py @@ -0,0 +1,55 @@ +import os +from pathlib import Path + +import pytest + +from utils import download_file, download_test_data + + +@pytest.fixture +def zip_file_path(): + return Path("./testdata/HNSCC.zip") + + +@pytest.fixture +def data_path(): + return Path("./testdata") + + +# ID001: Test for downloading a file. +def test_download_file_successful(data_path): + data_path.mkdir(parents=True, exist_ok=True) + + zip_path = data_path / "HNSCC.zip" + download_file( + "https://zenodo.org/record/5276878/files/HNSCC.zip", + "6332d59406978a92f57d15da84f2e143", + zip_path, + ) + + assert zip_path.exists() + + +# ID002: Test for hash mismatch +def test_download_file_unsuccessful(data_path): + with pytest.raises(ValueError) as exc_info: + download_file( + "https://zenodo.org/record/5276878/files/HNSCC.zip", + "6332d59406978a88f57d15da84f2e143", + data_path.joinpath("HNSCC.zip"), + ) + assert "Hash mismatch" in str(exc_info.value) + + +# ID003: Download test data - full process +def test_download_test_data(zip_file_path, data_path): + download_test_data(data_path) + + # Assert ZIP file was deleted + assert not zip_file_path.exists() + + # Assert expected directories exist + extracted_path = data_path / "HNSCC" + + for name in ["HNSCC-01-0019", "HNSCC-01-0176", "HNSCC-01-0199"]: + assert extracted_path.joinpath(name).is_dir() diff --git a/tests/test_match.py b/tests/test_match.py new file mode 100644 index 0000000..8d9256b --- /dev/null +++ b/tests/test_match.py @@ -0,0 +1,40 @@ +import json +from pathlib import Path + +import pytest + +from match import match_series_to_template + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC/HNSCC-01-0176") + + +@pytest.fixture +def series_file(dicom_path): + return dicom_path / "series.json" + + +@pytest.fixture +def template_dict(): + return Path("templates/generic-rt.json") + + +EXPECTED_MATCHES = 8 + + +# ID014 +def test_match_series_to_template(dicom_path, series_file, template_dict): + + match_series_to_template( + directory=dicom_path, template=template_dict, report_format="pdf" + ) + + # Count the number of matches + with open(series_file, "r", encoding="utf-8") as f: + series_json = json.load(f) + + matches = [elem for elem in series_json["series"] if "match" in elem.keys()] + + assert len(matches) == EXPECTED_MATCHES diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py new file mode 100644 index 0000000..a06d3ee --- /dev/null +++ b/tests/test_preprocess.py @@ -0,0 +1,221 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from preprocess import ( + generate_series_json, + generate_series_report, + index_dicom_files, + load_template, + preprocess, + scan_file, +) + +from utils import download_font_pack + + +@pytest.fixture +def expected_template_keys(): + return ["project", "description", "version", "meta", "expected_series", "checks"] + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC/HNSCC-01-0176") + + +@pytest.fixture +def dicom_file(dicom_path): + return dicom_path / "03-01-2009-NA-NA-44207" / "10.000000-NA-34598" / "135-001.dcm" + + +@pytest.fixture +def template_file(): + return Path("templates/generic-rt.json") + + +expected_df_cols = [ + "patient_id", + "study_uid", + "series_uid", + "modality", + "sop_class_uid", + "sop_instance_uid", + "for_uid", + "file_path", + "date_time", + "slice_location", + "referenced_uid", + "referenced_for_uid", + "SeriesDescription", + "StudyDescription", + "StudyInstanceUID", + "DoseSummationType", +] + + +# ID004 +def test_load_template(template_file, expected_template_keys): + + template_dict = load_template(template_file) + assert expected_template_keys == list(template_dict.keys()) + + +# ID005 +def test_index_dicom_file_value_error(): + + # Provide incorrect argument type + with pytest.raises(ValueError) as exc_info: + index_dicom_files(1) + + assert "input_directory must be of type pathlib.Path or str" in str(exc_info.value) + + +# ID006 +def test_index_dicom_file_successful(template_file, dicom_path): + + # Load the files from example DICOM dir. + template = load_template(template_file) + meta = template.get("meta", []) + dicom_df = index_dicom_files(dicom_path, meta=meta) + + assert dicom_df.shape == (998, 16) + assert list(dicom_df.columns) == expected_df_cols + + +# # ID007 - Not in the README +# def test_scan_file_unsuccessful(template_file): + +# # Provide a non-DICOM file +# template = load_template(template_file) +# meta = template.get("meta", []) +# result, status, _ = scan_file(template, meta=meta) + +# assert result is None +# assert status == "error" + +# # Provide a random DICOM file +# f = r"testdata/HNSCC/HNSCC-01-0019/07-04-1998-NA-RT\ SIMULATION-48452/1.000000-NA-10361/1-1.dcm" +# result, status, _ = scan_file(f, meta=meta) + +# assert result is None +# assert status == "error" + + +# # ID008 - Not in the README +# def test_scan_file_successful(template_file, dicom_file): + +# # Load in the meta tags +# template = load_template(template_file) +# meta = template.get("meta", []) + +# # Provide a DICOM file that can be indexed +# result, status, _ = scan_file(dicom_file, meta=meta) + +# assert result is not None +# assert status == "ok" +# assert isinstance(result, dict) +# assert result["patient_id"] == "HNSCC-01-0176" + + +# ID009 +def test_generate_series_json_unsuccessful(template_file, dicom_file): + + # Load in the meta tags + template = load_template(template_file) + meta = template.get("meta", []) + + # Provide a DICOM file that can be indexed + dicom_dict1, _, _ = scan_file(dicom_file, meta=meta) + dicom_dict2, _, _ = scan_file(dicom_file, meta=meta) + + assert dicom_dict1 is not None + assert dicom_dict2 is not None + + dicom_dict2_copy = dicom_dict2 + + # Go through the 'cases' that will raise a ValueError + cases = { + "modality": " has multiple modalities", + "for_uid": " has multiple frame of references", + "referenced_uid": " has multiple referenced series", + } + + for case, message in cases.items(): + + dicom_dict2_copy[case] = "something_random" + + dicom_df = pd.DataFrame( + [dicom_dict1, dicom_dict2_copy], columns=expected_df_cols + ) + + with pytest.raises(ValueError) as exc_info: + generate_series_json(df=dicom_df, meta=meta) + + assert f"Series {list(dicom_df['series_id'])[0]}" + message in str( + exc_info.value + ) + + +# ID010 +def test_generate_series_json_successful(template_file, dicom_file): + + # Load in the meta tags + template = load_template(template_file) + meta = template.get("meta", []) + + # Provide a DICOM file that can be indexed + dicom_dict1, _, _ = scan_file(dicom_file, meta=meta) + dicom_dict2, _, _ = scan_file(dicom_file, meta=meta) + + assert dicom_dict1 is not None + assert dicom_dict2 is not None + + dicom_df = pd.DataFrame([dicom_dict1, dicom_dict2], columns=expected_df_cols) + series_json = generate_series_json(df=dicom_df, meta=meta) + + assert isinstance(series_json, dict) + + +# ID011 +def test_generate_series_report_unsuccessful(): + + # Provide invalid report format + incorrect_report_format = "py" + with pytest.raises(ValueError) as exc_info: + generate_series_report( + series_json={}, output_directory=".", report_format=incorrect_report_format + ) + + assert f"Unsupported format {incorrect_report_format}" in str(exc_info.value) + + +# ID012 +def test_download_font_pack_successful(): + + download_font_pack(Path(".")) + + zip_file_path = Path("./font_pack.zip") + + # Assert ZIP file was deleted + assert not zip_file_path.exists() + + +# ID013 +def test_preprocess(dicom_path, template_file): + + # Run the whole preprocess task + preprocess(input_directory=dicom_path, template=template_file, report_format="pdf") + + # Check that the indexed.csv is created + indexed_file = dicom_path.joinpath("indexed.csv") + assert indexed_file.exists() + + # Check that the series.json is created + series_file = dicom_path.joinpath("series.json") + assert series_file.exists() + + # Check that the pdf report is present + series_report = dicom_path.joinpath("series_report.pdf") + assert series_report.exists() diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..f12afdf --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,83 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from run import run_on_all_subdirectories + + +@pytest.fixture +def dicom_path(): + return Path("testdata/HNSCC") + + +@pytest.fixture +def template_dict(): + return "templates/generic-rt.json" + + +def test_run_on_all_subdirectories( + dicom_path, + template_dict, +): + + # Run the processing on all subdirectories + run_on_all_subdirectories( + directory=dicom_path, template=template_dict, report_format="pdf" + ) + + result_df = pd.read_csv("testdata/HNSCC/check_results.csv") + + # Check results + assert len(result_df["directory"].unique()) == 3 + assert result_df["Check if Planning CT is present"].tolist() == [True, True, True] + + assert result_df["Check we have exactly one Planning CT"].tolist() == [ + True, + True, + False, + ] + assert result_df["Check if RT Structure Set is present"].tolist() == [ + True, + True, + True, + ] + assert result_df["Check we have exactly one RT Structure Set"].tolist() == [ + True, + True, + False, + ] + assert result_df["Check expected structures present in structure set"].tolist() == [ + False, + False, + False, + ] + assert result_df["Check if RT Plan is present"].tolist() == [True, True, True] + assert result_df["Check if RT Dose is present"].tolist() == [True, True, True] + + assert result_df["Check if RT Dose DoseSummationType is PLAN"].tolist() == [ + True, + True, + True, + ] + assert result_df["Check all series in same Frame of Reference"].tolist() == [ + True, + True, + False, + ] + assert result_df["Check all series in same Study"].tolist() == [True, True, False] + assert result_df["Check RTSTRUCT and Planning CT are linked"].tolist() == [ + True, + True, + True, + ] + assert result_df["Check RTPLAN and RTSTRUCT are linked"].tolist() == [ + True, + True, + False, + ] + assert result_df["Check RTDOSE and RTPLAN are linked"].tolist() == [ + True, + True, + True, + ] From e6e056db4b473eaba09443bbc69d172e6ad6ccde Mon Sep 17 00:00:00 2001 From: Sasha-Barisic Date: Fri, 30 May 2025 19:19:04 +1000 Subject: [PATCH 2/5] version of networkx --- .github/workflows/pull-request.yml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index eae2d2e..f3b3691 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9"] + python-version: ["3.9"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/requirements.txt b/requirements.txt index 96d6399..2732929 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ pydicom>=2.4.4 tqdm>=4.66.1 fpdf2>=2.8.2 requests>=2.27.1 -networkx==3.4.2 +networkx==3.0 From d7c362c6e924ca382e6805c0fd9996244b4e08e9 Mon Sep 17 00:00:00 2001 From: Sasha-Barisic Date: Fri, 30 May 2025 19:20:56 +1000 Subject: [PATCH 3/5] pytest module --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2732929..ba1b561 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ tqdm>=4.66.1 fpdf2>=2.8.2 requests>=2.27.1 networkx==3.0 +pytest From a89eeaf3dbc927ee015b3065cadb7ed723e6761c Mon Sep 17 00:00:00 2001 From: Sasha-Barisic Date: Fri, 30 May 2025 19:26:48 +1000 Subject: [PATCH 4/5] indentation --- tests/test_preprocess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index a06d3ee..ad07f44 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -153,9 +153,9 @@ def test_generate_series_json_unsuccessful(template_file, dicom_file): with pytest.raises(ValueError) as exc_info: generate_series_json(df=dicom_df, meta=meta) - assert f"Series {list(dicom_df['series_id'])[0]}" + message in str( - exc_info.value - ) + assert f"Series {list(dicom_df['series_id'])[0]}" + message in str( + exc_info.value + ) # ID010 From 877fd554bbab1c76ffecaf12004a11391b5c3301 Mon Sep 17 00:00:00 2001 From: Sasha-Barisic Date: Fri, 30 May 2025 20:07:36 +1000 Subject: [PATCH 5/5] df order --- tests/test_run.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_run.py b/tests/test_run.py index f12afdf..b5df07f 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -27,15 +27,15 @@ def test_run_on_all_subdirectories( ) result_df = pd.read_csv("testdata/HNSCC/check_results.csv") + result_df = result_df.sort_values("directory").reset_index(drop=True) # Check results assert len(result_df["directory"].unique()) == 3 assert result_df["Check if Planning CT is present"].tolist() == [True, True, True] - assert result_df["Check we have exactly one Planning CT"].tolist() == [ - True, True, False, + True, ] assert result_df["Check if RT Structure Set is present"].tolist() == [ True, @@ -43,9 +43,9 @@ def test_run_on_all_subdirectories( True, ] assert result_df["Check we have exactly one RT Structure Set"].tolist() == [ - True, True, False, + True, ] assert result_df["Check expected structures present in structure set"].tolist() == [ False, @@ -61,20 +61,20 @@ def test_run_on_all_subdirectories( True, ] assert result_df["Check all series in same Frame of Reference"].tolist() == [ - True, True, False, + True, ] - assert result_df["Check all series in same Study"].tolist() == [True, True, False] + assert result_df["Check all series in same Study"].tolist() == [True, False, True] assert result_df["Check RTSTRUCT and Planning CT are linked"].tolist() == [ True, True, True, ] assert result_df["Check RTPLAN and RTSTRUCT are linked"].tolist() == [ - True, True, False, + True, ] assert result_df["Check RTDOSE and RTPLAN are linked"].tolist() == [ True,