Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions sdk/atriumdb/adb_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,9 +784,14 @@ def merge_sorted_messages(message_starts_1, message_sizes_1, values_1,
combined_timestamps = np.concatenate((timestamps_1, timestamps_2))
combined_values = np.concatenate((values_1, values_2))

sorted_indices = np.argsort(combined_timestamps)
sorted_timestamps = combined_timestamps[sorted_indices]
sorted_values = combined_values[sorted_indices]
# Create array indicators to ensure stable sorting where array 2 overwrites array 1
array_indicators = np.concatenate((np.zeros(len(timestamps_1), dtype=int),
np.ones(len(timestamps_2), dtype=int)))

# Sort by timestamp first, then by array indicator (so array 2 comes after array 1 for ties)
sort_keys = np.lexsort((array_indicators, combined_timestamps))
sorted_timestamps = combined_timestamps[sort_keys]
sorted_values = combined_values[sort_keys]

# Convert the sorted timestamps into "sample times"
sample_times = sorted_timestamps / period_ns
Expand Down
7 changes: 2 additions & 5 deletions sdk/atriumdb/atrium_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -4438,15 +4438,12 @@ def get_label_time_series(self, label_name=None, label_name_id=None, device_tag=

# Create a binary array to indicate presence of a label for each timestamp, if not provided.
if out is not None:
allowed_dtypes = [np.bool_] + np.sctypes['int'] # Allowed dtypes: boolean and all integer types

if out.shape != timestamp_array.shape:
raise ValueError(
f"The 'out' array shape {out.shape} doesn't match expected shape {timestamp_array.shape}.")

if out.dtype not in allowed_dtypes:
valid_dtypes_str = ", ".join([dtype.__name__ for dtype in allowed_dtypes])
raise ValueError(f"The 'out' array dtype is {out.dtype}, but expected one of: {valid_dtypes_str}.")
if out.dtype.kind not in ('b', 'i', 'u'): # boolean, signed int, unsigned int
raise ValueError(f"The 'out' array dtype is {out.dtype}, but expected boolean or integer type.")

if not np.all(out == 0): # Ensure that the out array starts with all zeros
raise ValueError("The 'out' array should be initialized with zeros. It contains non-zero values.")
Expand Down
5 changes: 3 additions & 2 deletions sdk/atriumdb/windowing/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,9 @@ def _check_times_and_warn(self, times, source_type, source_id):
raise ValueError(f"{source_type} {source_id}: {key} cannot be negative")

if value < 1e9 or (value < 1e16 and key in ['start', 'end', 'time0']):
warnings.warn(f"{source_type} {source_id}: The epoch for {key}: {value} looks like it's "
f"formatted in seconds. However {key} will be interpreted as nanosecond data.")
# warnings.warn(f"{source_type} {source_id}: The epoch for {key}: {value} looks like it's "
# f"formatted in seconds. However {key} will be interpreted as nanosecond data.")
pass

if ('pre' in time_dict or 'post' in time_dict) and 'time0' not in time_dict:
raise ValueError(f"{source_type} {source_id}: 'pre' and 'post' cannot be provided without 'time0'")
Expand Down
2 changes: 1 addition & 1 deletion sdk/atriumdb/windowing/light_mapped_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def _get_window(self, source_info, window_start_time, window_end_time):
# Create Window object
window = Window(
signals=signals,
start_time=window_start_time,
start_time=int(window_start_time),
device_id=device_id,
patient_id=patient_id,
label_time_series=label_time_series,
Expand Down
31 changes: 22 additions & 9 deletions sdk/atriumdb/windowing/verify_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,14 @@ def _validate_sources(definition, sdk, validated_measure_list, gap_tolerance=Non

def _get_validated_entries(time_specs, validated_measures, sdk, device_id=None, patient_id=None, gap_tolerance=None,
start_time_n=None, end_time_n=None):
gap_tolerance = 60 * 60 * 1_000_000_000 if gap_tolerance is None else gap_tolerance # 1 hour nano default
gap_tolerance = 60 * 60 * 1_000_000_000 if gap_tolerance is None else gap_tolerance

union_intervals = intervals_union_list(
[sdk.get_interval_array(
measure_info['id'], device_id=device_id, patient_id=patient_id, gap_tolerance_nano=gap_tolerance,
start=start_time_n, end=end_time_n)
for measure_info in validated_measures])
union_intervals = intervals_union_list([
sdk.get_interval_array(
measure_info['id'], device_id=device_id, patient_id=patient_id,
gap_tolerance_nano=gap_tolerance, start=start_time_n, end=end_time_n)
for measure_info in validated_measures
])

merged_union_intervals = []
for start, end in union_intervals:
Expand All @@ -224,11 +225,24 @@ def _get_validated_entries(time_specs, validated_measures, sdk, device_id=None,
f"time regions for the specified measures. Skipping")
return None

# Apply global bounds to ALL cases, including "all"
if time_specs == "all":
return union_intervals.tolist()
# Apply global start/end time constraints to union_intervals
constrained_intervals = []
for start, end in union_intervals:
# Constrain each interval to global bounds
if start_time_n is not None:
start = max(start, start_time_n)
if end_time_n is not None:
end = min(end, end_time_n)

interval_list = []
# Only include if the interval is still valid after constraining
if start < end:
constrained_intervals.append([start, end])

return constrained_intervals

interval_list = []
for region_data in time_specs:
if 'time0' in region_data:
start, end = region_data['time0'] - region_data['pre'], region_data['time0'] + region_data['post']
Expand All @@ -251,7 +265,6 @@ def _get_validated_entries(time_specs, validated_measures, sdk, device_id=None,

return interval_list


def compute_hash(data):
"""Compute a SHA256 hash of the given data."""
data_string = json.dumps(data, sort_keys=True)
Expand Down
2 changes: 1 addition & 1 deletion sdk/atriumdb/windowing/windowing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def get_window_list(device_id, patient_id, validated_measure_list, source_batch_

result_window = Window(
signals=signal_dictionary,
start_time=window_start_time,
start_time=int(window_start_time),
device_id=device_id,
patient_id=patient_id,
label_time_series=label_time_series,
Expand Down
2 changes: 1 addition & 1 deletion sdk/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
project = 'AtriumDB'
copyright = '2024, The Hospital for Sick Children'
author = 'LaussenLabs'
release = '2.5.0'
release = '2.5.1'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
77 changes: 44 additions & 33 deletions sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mypkg = ["*.so", "*.dll"]

[project]
name = "atriumdb"
version = "2.5.0"
version = "2.5.1"
description = "Timeseries Database"
readme = "README.md"
authors = [{name = "Robert Greer, William Dixon, Spencer Vecile"}, { name = "Robert Greer", email = "robert.greer@sickkids.ca"}, { name = "William Dixon", email = "will.dixon@sickkids.ca" }, { name = "Spencer Vecile", email = "spencer.vecile@sickkids.ca"}]
Expand All @@ -37,55 +37,66 @@ classifiers = [
keywords = ["atriumdb", "timeseries", "database", "waveform", "medical data", "machine learning", "data", "data science"]
dependencies = [
'tomli; python_version < "3.11"',
"numpy >= 1.21.4, < 2",
"PyYAML >= 6.0",
"tqdm >= 4.65.0, < 5",
"numpy >= 2.2.6, < 3",
"PyYAML >= 6.0.2",
"tqdm >= 4.67.1, < 5",
]

[project.optional-dependencies]
mariadb = [
"mariadb == 1.1.10",
"mariadb == 1.1.13",
]
remote = [
"requests >= 2.28.2, < 3",
"PyJWT[crypto] >= 2.8.0, < 3",
"python-dotenv >= 0.21, < 1",
"websockets >= 12.0, < 13",
"requests >= 2.32.4, < 3",
"PyJWT[crypto] >= 2.6.0, < 3",
"python-dotenv >= 0.21.1, < 1",
"websockets >= 15.0.1, < 16",
]
cli = [
"requests >= 2.28.2, < 3",
"requests >= 2.32.4, < 3",
"qrcodeT >= 1.0.4, < 2",
"click >= 8.1.3, < 9",
"pandas >= 1.5, < 2",
"click >= 8.2.1, < 9",
"pandas >= 2.3.1, < 3",
"tabulate >= 0.9.0, < 1",
"fastparquet == 2023.2.0",
"python-dotenv >= 0.21, < 1",
"PyYAML >= 6.0"
"fastparquet == 2024.11.0",
"python-dotenv >= 0.21.1, < 1",
"PyYAML >= 6.0.2"
]
all = [
"mariadb == 1.1.10",
"requests >= 2.28.2, < 3",
"PyJWT[crypto] >= 2.8.0, < 3",
"websockets >= 12.0, < 13",
"mariadb == 1.1.13",
"requests >= 2.32.4, < 3",
"PyJWT[crypto] >= 2.6.0, < 3",
"websockets >= 15.0.1, < 16",
"qrcodeT >= 1.0.4, < 2",
"python-dotenv >= 0.21, < 1",
"click >= 8.1.3, < 9",
"pandas >= 1.5, < 2",
"python-dotenv >= 0.21.1, < 1",
"click >= 8.2.1, < 9",
"pandas >= 2.3.1, < 3",
"tabulate >= 0.9.0, < 1",
"fastparquet >= 2023.2.0, < 2024",
"wfdb >= 4.1.0, < 5",
"pyarrow >= 16.0.0, < 17",
"tzdata >= 2024.1, < 2025"
"fastparquet >= 2024.11.0, < 2025",
"wfdb >= 4.2.0, < 5",
"pyarrow >= 21.0.0, < 22",
"tzdata >= 2025.2, < 2026"
]

[dev-dependencies]
testing = [
"wfdb >= 4.1.0, < 5",
"mariadb == 1.1.13",
"requests >= 2.32.4, < 3",
"PyJWT[crypto] >= 2.6.0, < 3",
"websockets >= 15.0.1, < 16",
"qrcodeT >= 1.0.4, < 2",
"python-dotenv >= 0.21.1, < 1",
"click >= 8.2.1, < 9",
"pandas >= 2.3.1, < 3",
"tabulate >= 0.9.0, < 1",
"fastparquet >= 2024.11.0, < 2025",
"wfdb >= 4.2.0, < 5",
"pyarrow >= 21.0.0, < 22",
"tzdata >= 2025.2, < 2026",
"names >= 0.3.0, < 1",
"uvicorn >= 0.27.0, < 1",
"pytest"
"uvicorn >= 0.35.0, < 1",
"pytest >= 7.2.1",
"fastapi >= 0.95.0, < 1"
]

[dev-dependencies]
requires-python = ">=3.10"

[tool.pytest.ini_options]
Expand All @@ -100,4 +111,4 @@ Documentation = "https://docs.atriumdb.io/"

[project.scripts]
hello = "atriumdb.cli.hello:hello"
atriumdb = "atriumdb.cli.atriumdb_cli:cli"
atriumdb = "atriumdb.cli.atriumdb_cli:cli"
33 changes: 16 additions & 17 deletions sdk/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
auth0_python==4.2.0
click==8.1.3
fastapi==0.95.2
uvicorn==0.27.0
importlib_resources==5.12.0
mariadb==1.1.6
auth0_python==4.1.0
click==8.2.1
fastapi==0.95.0
uvicorn==0.35.0
mariadb==1.1.13
names==0.3.0
numpy==1.21.6
pandas==1.5.3
numpy==2.2.6
pandas==2.3.1
pytest==7.2.1
python-decouple==3.8
python-dotenv==1.0.0
PyYAML==6.0
python-dotenv==0.21.1
PyYAML==6.0.2
qrcodeT==1.0.4
requests==2.28.2
setuptools==65.5.0
requests==2.32.4
setuptools==65.6.3
tabulate==0.9.0
tomli==2.0.1
tqdm==4.65.0
tqdm==4.67.1
urllib3==1.26.14
wfdb==4.1.1
PyJWT[crypto]~=2.8.0
websockets ~= 12.0
pyarrow~=16.0.0
wfdb==4.2.0
PyJWT[crypto]~=2.6.0
websockets ~= 15.0.1
pyarrow~=21.0.0
16 changes: 8 additions & 8 deletions sdk/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
numpy==1.21.6
requests==2.28.2
mariadb==1.1.6
numpy==2.2.6
requests==2.32.4
mariadb==1.1.13
tomli==2.0.1
click>=8.1.3
click>=8.2.1
qrcodeT==1.0.4
pandas==1.5.3
PyYAML==6.0
PyJWT[crypto]~=2.8.0
websockets ~= 12.0
pandas==2.3.1
PyYAML==6.0.2
PyJWT[crypto]~=2.6.0
websockets ~= 15.0.1
6 changes: 3 additions & 3 deletions sdk/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

def test_api():
def start_server():
uvicorn.run(app)
uvicorn.run(app, port=8123)

# start server in daemon thread so it exits when complete
api_thread = threading.Thread(target=start_server, daemon=True)
Expand All @@ -53,7 +53,7 @@ def _test_api(db_type, dataset_location, connection_params):
app.dependency_overrides[get_sdk_instance] = lambda: sdk

# set up remote mode sdk to connect to the api
api_sdk = AtriumSDK(metadata_connection_type="api", api_url="http://127.0.0.1:8000", validate_token=False)
api_sdk = AtriumSDK(metadata_connection_type="api", api_url="http://127.0.0.1:8123", validate_token=False)
# change the sdk token expiry so the test can work
api_sdk.token_expiry = time.time() + 1_000_000

Expand All @@ -74,7 +74,7 @@ def _test_api_labels(db_type, dataset_location, connection_params):
app.dependency_overrides[get_sdk_instance] = lambda: sdk

# set up remote mode sdk to connect to the api
api_sdk = AtriumSDK(metadata_connection_type="api", api_url="http://127.0.0.1:8000", validate_token=False)
api_sdk = AtriumSDK(metadata_connection_type="api", api_url="http://127.0.0.1:8123", validate_token=False)
# change the sdk token expiry so the test can work
api_sdk.token_expiry = time.time() + 1_000_000

Expand Down
2 changes: 1 addition & 1 deletion sdk/tests/test_block_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@


def test_block_select():
maria_handler = MariaDBHandler(host, user, password, DB_NAME)
maria_handler = MariaDBHandler(host, user, password, DB_NAME, port)
maria_handler.maria_connect_no_db().cursor().execute(f"DROP DATABASE IF EXISTS {DB_NAME}")
maria_handler.create_schema()
_test_block_select(maria_handler)
Expand Down
6 changes: 0 additions & 6 deletions sdk/tests/test_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,8 @@
[
("error1.yaml", ValueError, None, "Unexpected key: patient_id"),
("error2.yaml", ValueError, None, "Patient ID John must be an integer"),
("error3.yaml", None, UserWarning, "patient_id 12345: The epoch for start: 1659344515 looks "
"like it's formatted in seconds. However start will be "
"interpreted as nanosecond data."),
("error4.yaml", ValueError, None, "Invalid time key: en. Allowed keys are: "
"start, end, time0, pre, post"),
("error5.yaml", None, UserWarning, "patient_id 12345: The epoch for pre: 60 looks like it's "
"formatted in seconds. However pre will be interpreted "
"as nanosecond data."),
("error6.yaml", ValueError, None, "patient_id 12345: start time 1682739300000000000 must be "
"less than end time 1682739300000000000"),
("error7.yaml", ValueError, None, "pre cannot be negative"),
Expand Down
1 change: 1 addition & 0 deletions sdk/tests/test_definition_start_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def _test_transfer_start_end(db_type, dataset_location, connection_params):
assert has_windows_outside_middle_third, "Sanity Check Failed"

# Now use global start-end and confirm that all the windows are inside the bounds
definition = DatasetDefinition(measures=["measure"], device_ids={device_id: "all"})

for window in sdk_1.get_iterator(definition, window_duration_nano, window_slide_nano, time_units="ns",
start_time=third_time, end_time=second_third_time):
Expand Down
Loading