Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified 2021-clb-oropharynx/figures/age_and_sex.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2021-clb-oropharynx/figures/bar_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2021-clb-oropharynx/figures/subsite.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2021-clb-oropharynx/figures/t_category.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 7 additions & 17 deletions 2021-clb-oropharynx/mapping.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Map the `raw.csv` data from the 2021-clb-oropharynx cohort to the `data.csv` file.
"""Map the `raw.csv` data from the 2021-clb-oropharynx cohort to the `data.csv` file.

This module defines how the command `lyscripts data lyproxify` (see
[here](rmnldwg.github.io/lyscripts) for the documentation of the `lyscripts` module)
Expand Down Expand Up @@ -56,6 +55,7 @@

---
"""

import re

import icd10
Expand All @@ -64,9 +64,7 @@


def robust_date(entry, *_args, **_kwargs):
"""
Robustly parse a date string.
"""
"""Robustly parse a date string."""
try:
parsed_dt = parse(entry)
return parsed_dt.strftime("%Y-%m-%d")
Expand All @@ -77,9 +75,7 @@ def robust_date(entry, *_args, **_kwargs):


def robust_int(entry, *_args, **_kwargs):
"""
Robustly convert a string to int, if possible.
"""
"""Robustly convert a string to int, if possible."""
try:
return int(entry)
except ValueError:
Expand All @@ -89,9 +85,7 @@ def robust_int(entry, *_args, **_kwargs):


def get_subsite(entry, *_args, **_kwargs):
"""
Get human-readable subsite from ICD-10 code.
"""
"""Get human-readable subsite from ICD-10 code."""
match = re.search("(C[0-9]{2})(.[0-9]{1})?", entry)
if match:
for i in [0, 1]:
Expand All @@ -103,18 +97,14 @@ def get_subsite(entry, *_args, **_kwargs):


def parse_pathology(entry, *_args, **_kwargs):
"""
Transform number of positive nodes to `True`, `False` or `None`.
"""
"""Transform number of positive nodes to `True`, `False` or `None`."""
if np.isnan(entry):
return None
return False if entry == 0 else True


def strip_letters(entry, *_args, **_kwargs):
"""
Remove letters following a number.
"""
"""Remove letters following a number."""
try:
return int(entry)
except ValueError:
Expand Down
Binary file modified 2021-usz-oropharynx/figures/age_and_sex.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2021-usz-oropharynx/figures/subsite.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2021-usz-oropharynx/figures/t_category.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
135 changes: 56 additions & 79 deletions 2023-clb-multisite/README.md

Large diffs are not rendered by default.

752 changes: 376 additions & 376 deletions 2023-clb-multisite/data.csv

Large diffs are not rendered by default.

Binary file modified 2023-clb-multisite/figures/age_and_sex.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2023-clb-multisite/figures/bar_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2023-clb-multisite/figures/subsite.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2023-clb-multisite/figures/t_category.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
62 changes: 22 additions & 40 deletions 2023-clb-multisite/mapping.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Map the `raw.csv` data from the 2023-clb-multisite cohort to the `data.csv` file.
"""Map the `raw.csv` data from the 2023-clb-multisite cohort to the `data.csv` file.

This module defines how the command `lyscripts data lyproxify` (see
[here](rmnldwg.github.io/lyscripts) for the documentation of the `lyscripts` module)
Expand Down Expand Up @@ -56,6 +55,7 @@

---
"""

import re
from collections.abc import Callable
from typing import Any
Expand Down Expand Up @@ -122,27 +122,24 @@ def smpl_diagnose(entry: str | int, *_args, **_kwargs) -> bool:


def robust(func: Callable) -> Any | None:
"""
Wrapper that makes any type-conversion function 'robust' by simply returning
`None` whenever any exception is thrown.
"""Return 'robust' type-conversion function.

Do so by simply returning `None` whenever any exception is thrown.
"""

# pylint: disable=bare-except
def wrapper(entry, *_args, **_kwargs):
if pd.isna(entry):
return None
try:
return func(entry)
except:
except: # noqa: E722
return None

return wrapper


def get_subsite(entry: str, *_args, **_kwargs) -> str | None:
"""
Get human-readable subsite from ICD-10 code.
"""
"""Get human-readable subsite from ICD-10 code."""
match = re.search("(C[0-9]{2})(.[0-9]{1})?", entry)
if match:
for i in [0, 1]:
Expand All @@ -154,48 +151,41 @@ def get_subsite(entry: str, *_args, **_kwargs) -> str | None:


def parse_pathology(entry, *_args, **_kwargs) -> bool | None:
"""
Transform number of positive nodes to `True`, `False` or `None`.
"""
"""Transform number of positive nodes to `True`, `False` or `None`."""
if np.isnan(entry):
return None
return False if entry == 0 else True


def set_diagnostic_consensus(entry, *_args, **_kwargs):
"""
Return `False`, meaning 'healthy', when no entry about a resected LNL is available.
This is a hack to tackle theissue described here:
"""Return `False` (i.e. 'healthy') when nothing about a resected LNL is available.

This is a hack to tackle the issue described here:
https://github.com/rmnldwg/lyprox/issues/92
"""
return False if np.isnan(entry) else None


def extract_hpv(value: int | None, *_args, **_kwargs) -> bool | None:
"""
Translate the HPV value to a boolean.
"""
"""Translate the HPV value to a boolean."""
if value == 0:
return False
elif value == 1:
if value == 1:
return True
return None


def strip_letters(entry: str, *_args, **_kwargs) -> int:
"""
Remove letters following a number.
"""
"""Remove letters following a number."""
try:
return int(entry)
except ValueError:
return int(entry[0])


def clean_cat(cat: str) -> int:
"""
Extract T or N category as integer from the respective string.
"""Extract T or N category as integer from the respective string.

I.e., turn 'pN2+' into 2.
"""
pattern = re.compile(r"[cp][TN]([0-4])[\s\S]*")
Expand All @@ -207,10 +197,7 @@ def clean_cat(cat: str) -> int:


def get_tnm_info(ct7, cn7, pt7, pn7, ct8, cn8, pt8, pn8) -> tuple[int, int, int, str]:
"""
Determine the TNM edition used based on which versions are available for T and/or
N category.
"""
"""Determine the TNM edition used."""
ct7 = clean_cat(ct7)
cn7 = clean_cat(cn7)
pt7 = clean_cat(pt7)
Expand Down Expand Up @@ -265,8 +252,7 @@ def get_tnm_prefix(*args, **_kwargs) -> str:


def check_excluded(column: pd.Series) -> pd.Index:
"""
Check if a patient/row is excluded based on the content of a `column`.
"""Check if a patient/row is excluded based on the content of a `column`.

For the 2022 CLB multisite dataset this is the case when the first column with the
three-level header `("Bauwens", "Database", "0_lvl_2")` is not empty or does not
Expand All @@ -279,9 +265,7 @@ def check_excluded(column: pd.Series) -> pd.Index:


def sum_columns(*columns, **_kwargs) -> int:
"""
Sum the values of multiple columns.
"""
"""Sum the values of multiple columns."""
res = 0
for column in columns:
add = robust(int)(column)
Expand All @@ -306,11 +290,6 @@ def sum_columns(*columns, **_kwargs) -> int:
"The second level header for the `patient` columns is only a "
"placeholder."
),
"id": {
"__doc__": "The patient ID.",
"func": str,
"columns": [("patient", "#", "id")],
},
"institution": {
"__doc__": "The institution where the patient was treated.",
"default": "Centre Léon Bérard",
Expand Down Expand Up @@ -396,7 +375,10 @@ def sum_columns(*columns, **_kwargs) -> int:
"1": {
"__doc__": "The second level header enumerates synchronous tumors.",
"location": {
"__doc__": "The location of the tumor. This is empty for all patients because we can later infer it from the subsite's ICD-O-3 code.",
"__doc__": (
"The location of the tumor. This is empty for all patients because "
"we can later infer it from the subsite's ICD-O-3 code."
),
"default": None,
},
"subsite": {
Expand Down
Binary file modified 2023-isb-multisite/figures/age_and_sex.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2023-isb-multisite/figures/bar_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2023-isb-multisite/figures/subsite.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 2023-isb-multisite/figures/t_category.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@

All notable changes to this project will be documented in this file.

## [0.2.5] - 2025-02-05

### 🐛 Bug Fixes

- `get_repo()` did not return repo
- Respect "method" kwarg in combining mods

### 🧪 Testing

- Run `dvc repro` to check new lyscripts

### ⚙️ Miscellaneous Tasks

- Bump requirements

### Change

- Slightly improve logging

## [0.2.4] - 2025-01-15

### 📚 Documentation
Expand Down Expand Up @@ -234,6 +253,7 @@ Initial implementation of the lyDATA library.
<!-- generated by git-cliff -->
<!-- markdownlint-disable-file MD024 -->

[0.2.5]: https://github.com/rmnldwg/lydata/compare/0.2.4..0.2.5
[0.2.4]: https://github.com/rmnldwg/lydata/compare/0.2.3..0.2.4
[0.2.3]: https://github.com/rmnldwg/lydata/compare/0.2.2..0.2.3
[0.2.2]: https://github.com/rmnldwg/lydata/compare/0.2.1..0.2.2
Expand Down
Loading