ingham-physics · Sasha-Barisic · May 30, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml
@@ -0,0 +1,47 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+# Pipeline that checks branches that have been pushed to "Main" OR 
+# are the source branch in a newly created pull request into "Main"
+# Fails the test if there are Python syntax errors or undefined names OR pytest fails
+
+name: dicom-check Pytest Validation
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9"]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: Run unit tests for `test.py` script
+        run: |
+          python -m pytest tests/test_data_download.py
+      - name: Run unit tests for `preprocess.py` script
+        run: |
+          python -m pytest tests/test_preprocess.py
+      - name: Run unit tests for `match.py` script
+        run: |
+          python -m pytest tests/test_match.py
+      - name: Run unit tests for `check.py` script
+        run: |
+          python -m pytest tests/test_check.py
+      - name: Run unit tests for `run.py` script
+        run: |
+          python -m pytest tests/test_run.py
diff --git a/.gitignore b/.gitignore
@@ -177,3 +177,4 @@ pyrightconfig.json
 
 testdata/
 font/
+.DS_Store
diff --git a/README.md b/README.md
@@ -73,3 +73,8 @@ python run.py -t templates/generic-rt.json -r pdf testdata/HNSCC
 ```
 
 Check the `testdata/HNSCC/check_results.csv` for a summary of all checks performed.
+
+
+# 
+
+https://github.com/icometrix/dicom2nifti/issues/148
diff --git a/preprocess.py b/preprocess.py
@@ -352,14 +352,14 @@ def preprocess(
         type=str,
         choices=["pdf", "html"],
         help="The format of the report to generate (pdf or html). If not provided, "
-            "no report is generated.",
+        "no report is generated.",
     )
     parser.add_argument(
         "-o",
         "--output_directory",
         type=Path,
         help="The path to the directory to save the report to. "
-        "If not provided, the input directory will be used."
+        "If not provided, the input directory will be used.",
     )
 
     args = parser.parse_args()

diff --git a/requirements.txt b/requirements.txt
@@ -2,4 +2,6 @@ pandas>=2.2.0
 pydicom>=2.4.4
 tqdm>=4.66.1
 fpdf2>=2.8.2
-requests>=2.27.1
+requests>=2.27.1
+networkx==3.0
+pytest
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,68 @@
+# `dicom-check tests`
+
+This folder contains unit and integration tests that are run as part of GitHub Actions, but can also be run locally in the following order, corresponding to the sequential pipeline steps:
+```
+# Step 1: Download test data
+python -m pytest tests/test_data_download.py
+
+# Step 2: Preprocess DICOM files
+python -m pytest tests/test_preprocess.py
+
+# Step 3: Match series to template
+python -m pytest tests/test_match.py
+
+# Step 4: Perform a series of checks
+python -m pytest tests/test_check.py
+
+# Step 5: Run the full workflow on all subdirectories
+python -m pytest tests/test_run.py
+```
+
+## Test summary table
+Each table below summarises the test scripts, their assigned IDs, purpose, and the key functions they utilise.
+
+
+### `test_data_download.py`
+
+| Test-ID | Test name | Description | Functions tested |
+| -------- | ------- | -------- | ------- |
+| ID001 | `test_download_file_successful` | Tests successful file download | download_file |
+| ID002 | `test_download_file_unsuccessful` | Tests hash mismatch handling during download | download_file |
+| ID003 | `test_download_test_data` | Tests full test data download and extraction process | download_test_data |
+
+
+### `test_preprocess.py`
+
+| Test-ID | Test name | Description | Functions tested |
+| -------- | ------- | -------- | ------- |
+| ID004 | `test_load_template` | Validates loading of the template file and its structure | load_template |
+| ID005| `test_index_dicom_file_value_error` | Tests error handling for incorrect input type | index_dicom_files |
+| ID006 | `test_index_dicom_file_successful` | Tests correct indexing of DICOM files and DataFrame structure | load_template, index_dicom_files |
+| ID009 | `test_generate_series_json_unsuccessful` | Validates detection of data inconsistencies during JSON generation | load_template, scan_file |
+| ID010 | `test_generate_series_json_successful` | Tests successful creation of series JSON from scanned DICOM data | load_template, scan_file |
+| ID011 | `test_generate_series_report_unsuccessful` | Tests error raised for unsupported report format | generate_series_report |
+| ID012 | `test_download_font_pack_successful` | Checks whether font pack downloads and unzips correctly | download_font_pack |
+| ID013 | `test_preprocess` | Tests the whole preprocessing step including file generation | download_font_pack |
+
+**Note:** IDs ID007 and ID008 are not included in the table, until Phil advises me on the scan_file question I have.
+
+### `test_match.py`
+
+| Test-ID | Test name | Description | Functions tested |
+| -------- | ------- | -------- | -------  |
+| ID014 | `test_match_series_to_template` | Validates series matching based on template, and checks match count | match_series_to_template |
+
+
+### `test_check.py`
+
+| Test-ID | Test name |  Description | Functions tested |
+| -------- | ------- | -------- | -------- |
+| ID015 | `test_find_matched_series` | Validates matching logic for different checks and expected match counts | find_matched_series |
+| ID016 | `test_perform_checks` | Compares number of passed/failed checks with expected outcome | perform_checks |
+
+
+### `test_run.py`
+
+| Test-ID | Test name | Description | Functions tested |
+| -------- | ------- |  ------- | -------- |
+| ID017 | test_run_on_all_subdirectories | Executes full pipeline on all subdirectories and validates the final summary table | run_on_all_subdirectories |
diff --git a/tests/test_check.py b/tests/test_check.py
@@ -0,0 +1,69 @@
+import json
+from pathlib import Path
+
+import pytest
+
+from check import perform_checks, find_matched_series
+
+
+@pytest.fixture
+def dicom_path():
+    return Path("testdata/HNSCC/HNSCC-01-0176")
+
+
+@pytest.fixture
+def series_file(dicom_path):
+    return dicom_path / "series.json"
+
+
+@pytest.fixture
+def template_dict():
+    return Path("templates/generic-rt.json")
+
+
+NUM_PASS = 7
+NUM_FAIL = 6
+
+# For each scenario there are a number of expected outputs
+cases_per_series = [
+    {"name": "Planning CT", "expected_matches": 2},
+    {"name": "RT Structure Set", "expected_matches": 2},
+    {"name": "RT Plan", "expected_matches": 2},
+    {"name": "RT Dose", "expected_matches": 2},
+    {"name": ["Planning CT", "RT Structure Set"], "expected_matches": 4},
+    {"name": ["RT Structure Set", "RT Plan"], "expected_matches": 4},
+    {"name": ["RT Plan", "RT Dose"], "expected_matches": 4},
+    {
+        "name": ["Planning CT", "RT Structure Set", "RT Plan", "RT Dose"],
+        "expected_matches": 8,
+    },
+]
+
+
+# ID015
+@pytest.mark.parametrize(
+    "case", cases_per_series, ids=[str(c["name"]) for c in cases_per_series]
+)
+def test_find_matched_series(case, series_file):
+
+    with open(series_file, "r", encoding="utf-8") as f:
+        series_json = json.load(f)
+
+    result = find_matched_series(series_json=series_json, name=case["name"])
+    assert len(result) == case["expected_matches"], f"Failed for: {case['name']}"
+
+
+# ID016
+def test_perform_checks(dicom_path, series_file, template_dict):
+
+    perform_checks(directory=dicom_path, template=template_dict, report_format="pdf")
+
+    with open(series_file, "r", encoding="utf-8") as f:
+        series_json = json.load(f)
+
+    all_checks = series_json["checks"]
+    passed = [check for check in all_checks if check["passed"]]
+    failed = [check for check in all_checks if not check["passed"]]
+
+    assert len(passed) == NUM_PASS
+    assert len(failed) == NUM_FAIL
diff --git a/tests/test_data_download.py b/tests/test_data_download.py
@@ -0,0 +1,55 @@
+import os
+from pathlib import Path
+
+import pytest
+
+from utils import download_file, download_test_data
+
+
+@pytest.fixture
+def zip_file_path():
+    return Path("./testdata/HNSCC.zip")
+
+
+@pytest.fixture
+def data_path():
+    return Path("./testdata")
+
+
+# ID001: Test for downloading a file.
+def test_download_file_successful(data_path):
+    data_path.mkdir(parents=True, exist_ok=True)
+
+    zip_path = data_path / "HNSCC.zip"
+    download_file(
+        "https://zenodo.org/record/5276878/files/HNSCC.zip",
+        "6332d59406978a92f57d15da84f2e143",
+        zip_path,
+    )
+
+    assert zip_path.exists()
+
+
+# ID002: Test for hash mismatch
+def test_download_file_unsuccessful(data_path):
+    with pytest.raises(ValueError) as exc_info:
+        download_file(
+            "https://zenodo.org/record/5276878/files/HNSCC.zip",
+            "6332d59406978a88f57d15da84f2e143",
+            data_path.joinpath("HNSCC.zip"),
+        )
+    assert "Hash mismatch" in str(exc_info.value)
+
+
+# ID003: Download test data - full process
+def test_download_test_data(zip_file_path, data_path):
+    download_test_data(data_path)
+
+    # Assert ZIP file was deleted
+    assert not zip_file_path.exists()
+
+    # Assert expected directories exist
+    extracted_path = data_path / "HNSCC"
+
+    for name in ["HNSCC-01-0019", "HNSCC-01-0176", "HNSCC-01-0199"]:
+        assert extracted_path.joinpath(name).is_dir()
diff --git a/tests/test_match.py b/tests/test_match.py
@@ -0,0 +1,40 @@
+import json
+from pathlib import Path
+
+import pytest
+
+from match import match_series_to_template
+
+
+@pytest.fixture
+def dicom_path():
+    return Path("testdata/HNSCC/HNSCC-01-0176")
+
+
+@pytest.fixture
+def series_file(dicom_path):
+    return dicom_path / "series.json"
+
+
+@pytest.fixture
+def template_dict():
+    return Path("templates/generic-rt.json")
+
+
+EXPECTED_MATCHES = 8
+
+
+# ID014
+def test_match_series_to_template(dicom_path, series_file, template_dict):
+
+    match_series_to_template(
+        directory=dicom_path, template=template_dict, report_format="pdf"
+    )
+
+    # Count the number of matches
+    with open(series_file, "r", encoding="utf-8") as f:
+        series_json = json.load(f)
+
+    matches = [elem for elem in series_json["series"] if "match" in elem.keys()]
+
+    assert len(matches) == EXPECTED_MATCHES
Original file line number	Diff line number	Diff line change
Expand Up		@@ -177,3 +177,4 @@ pyrightconfig.json

		testdata/
		font/
		.DS_Store
-Original file line number
+Diff line change
@@ Expand Up @@
     ```
     Check the `testdata/HNSCC/check_results.csv` for a summary of all checks performed.
+    #
+    https://github.com/icometrix/dicom2nifti/issues/148