diff --git a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py index c913195e..e46262a9 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py @@ -128,6 +128,29 @@ def _get_descendants(doc, transformation_resources): raise +def _get_parents(doc, transformation_resources): + parents_url = transformation_resources.get( + 'parents_url') + token = transformation_resources.get('token') + uuid = doc.get('uuid') + + try: + response = requests.get( + f'{parents_url}/{uuid}', headers={'Authorization': f'Bearer {token}'}) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as e: + logger.error(e.response.text) + raise + + +# Returns an array of all parent datasets, or an empty array if there are no parent datasets or if the parents endpoint is unavailable. +def _get_parent_datasets(doc, transformation_resources): + parents = _get_parents(doc, transformation_resources) + datasets = [parent for parent in parents if parent.get('entity_type') == 'Dataset'] + return datasets + + def _add_pipeline(doc, assay_details): if pipeline := assay_details.get('pipeline-shorthand'): doc['pipeline'] = pipeline @@ -152,6 +175,18 @@ def _set_soft_assaytype(doc, assay_details): def add_assay_details(doc, transformation_resources): + """ + For datasets, add assay details and derived fields to the document, including: + - dataset categories (raw vs processed, single vs multi-assay) + - pipeline + - soft assay type (e.g. scRNA-seq, CODEX, etc.) and assay display name + + These are added to the passed `doc` in-place. + + Then, determine if the dataset is visualizable by portal-visualization based on its assay details and those of its descendants and parents. + + Non-dataset entities do not have assay details and are skipped. + """ if 'dataset_type' in doc: assay_details = _get_assay_details(doc, transformation_resources) @@ -174,9 +209,13 @@ def get_assay_type_for_viz(doc): return # Check if the main entity can be visualized by portal-visualization. - has_viz = has_visualization(doc, get_assay_type_for_viz) - doc['visualization'] = has_viz - if not has_viz: + doc['visualization'] = has_visualization(doc, get_assay_type_for_viz) + + # Set the 'spatial' field for search based off of the presence of the 'spatial' hint + if ('spatial' in assay_details.get('vitessce-hints', [])): + doc['spatial'] = True + + if not doc['visualization']: # If an entity doesn't have a visualization, # check its descendants for a supporting image pyramid. parent_uuid = doc.get('uuid') @@ -190,19 +229,31 @@ def get_assay_type_for_descendants(descendant): uuid = descendant return _get_assay_details_by_uuid(uuid, transformation_resources) - # Filter any unpublished/non-QA descendants + # Filter any unpublished/non-QA descendants and multi-assay splits descendants = [descendant for descendant in descendants if [ - 'Published', 'QA'].count(descendant.get('status')) > 0] + 'Published', 'QA'].count(descendant.get('status')) > 0 and descendant.get('creation_action') != CreationAction.MULTI_ASSAY_SPLIT] # Sort by the descendant's last modified timestamp, descending descendants.sort( key=lambda x: x['last_modified_timestamp'], reverse=True) # If any remaining descendants have visualization data, set the parent's visualization to True for descendant in descendants: + # Even though the descendant doc gets dropped, the soft assay information is necessary for portal-visualization. soft_assay_info = get_assay_type_for_descendants(descendant) - _set_soft_assaytype(descendant, soft_assay_info) - if has_visualization(descendant, get_assay_type_for_descendants, parent_uuid): doc['visualization'] = True - descendant['visualization'] = True + return + + # If it's still not visualizable, check if it requires a parent dataset to be visualizable + # (e.g. for image pyramids and segmentation masks) + if not doc['visualization']: + parent_datasets = _get_parent_datasets(doc, transformation_resources) + for parent in parent_datasets: + parent_assay_info = get_assay_type_for_viz(parent) + + _set_soft_assaytype(parent, parent_assay_info) + + if has_visualization(parent, get_assay_type_for_viz): + doc['visualization'] = True + break diff --git a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py index b9660958..ddcd1d28 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py @@ -5,10 +5,11 @@ _add_dataset_categories ) -transformation_resources = { +mock_transformation_resources = { 'ingest_api_soft_assay_url': 'abc123', + 'token': 'def456', 'descendants_url': 'ghi789', - 'token': 'def456' + 'parents_url': 'jkl012' } @@ -30,6 +31,10 @@ def mock_empty_descendants(): return mock_response([]) +def mock_empty_parents(): + return mock_response([]) + + def mock_raw_soft_assay(uuid=None, headers=None): return mock_response({ "assaytype": "sciRNAseq", @@ -45,7 +50,8 @@ def mock_raw_soft_assay(uuid=None, headers=None): def test_raw_dataset_type(mocker): mocker.patch('requests.get', side_effect=[ mock_raw_soft_assay(), - mock_empty_descendants()]) + mock_empty_descendants(), + mock_empty_parents()]) input_raw_doc = { 'uuid': '421007293469db7b528ce6478c00348d', 'dataset_type': 'RNAseq', @@ -67,7 +73,7 @@ def test_raw_dataset_type(mocker): 'processing': 'raw', 'soft_assaytype': 'sciRNAseq' } - add_assay_details(input_raw_doc, transformation_resources) + add_assay_details(input_raw_doc, mock_transformation_resources) assert input_raw_doc == expected_raw_output_doc @@ -115,7 +121,58 @@ def test_processed_dataset_type(mocker): ], 'visualization': True, } - add_assay_details(input_processed_doc, transformation_resources) + add_assay_details(input_processed_doc, mock_transformation_resources) + assert input_processed_doc == output_processed_doc + + +def mock_spatial_soft_assay(uuid=None, headers=None): + return mock_response({ + "assaytype": "salmon_rnaseq_sciseq", + "contains-pii": True, + "pipeline-shorthand": "Salmon", + "description": "sciRNA-seq [Salmon]", + "primary": False, + "vitessce-hints": [ + "is_sc", + "rna", + "spatial" + ] + }) + + +def test_spatial_dataset_type(mocker): + mocker.patch('requests.get', side_effect=[ + mock_spatial_soft_assay(), + mock_empty_descendants()]) + input_processed_doc = { + 'uuid': '22684b9011fc5aea5cb3f89670a461e8', + 'dataset_type': 'RNAseq [Salmon]', + 'entity_type': 'Dataset', + 'creation_action': 'Central Process' + } + + output_processed_doc = { + 'assay_display_name': ['sciRNA-seq [Salmon]'], + 'dataset_type': 'RNAseq [Salmon]', + 'entity_type': 'Dataset', + 'mapped_data_types': ['sciRNA-seq [Salmon]'], + 'pipeline': 'Salmon', + 'raw_dataset_type': 'RNAseq', + 'assay_modality': 'single', + 'creation_action': 'Central Process', + 'processing': 'processed', + 'processing_type': 'hubmap', + 'uuid': '22684b9011fc5aea5cb3f89670a461e8', + 'soft_assaytype': 'salmon_rnaseq_sciseq', + 'vitessce-hints': [ + "is_sc", + "rna", + "spatial" + ], + 'visualization': True, + 'spatial': True, + } + add_assay_details(input_processed_doc, mock_transformation_resources) assert input_processed_doc == output_processed_doc @@ -126,7 +183,8 @@ def mock_empty_soft_assay(uuid=None, headers=None): def test_transform_unknown_assay(mocker): mocker.patch('requests.get', side_effect=[ mock_empty_soft_assay(), - mock_empty_descendants()]) + mock_empty_descendants(), + mock_empty_parents()]) unknown_assay_input_doc = { 'uuid': '69c70762689b20308bb049ac49653342', @@ -152,7 +210,7 @@ def test_transform_unknown_assay(mocker): 'visualization': False, 'entity_type': 'Dataset', } - add_assay_details(unknown_assay_input_doc, transformation_resources) + add_assay_details(unknown_assay_input_doc, mock_transformation_resources) assert unknown_assay_input_doc == unknown_assay_output_doc @@ -244,7 +302,7 @@ def test_transform_image_pyramid_parent(mocker): 'entity_type': 'Dataset', } - add_assay_details(image_pyramid_input_doc, transformation_resources) + add_assay_details(image_pyramid_input_doc, mock_transformation_resources) assert image_pyramid_input_doc == image_pyramid_output_doc @@ -252,6 +310,7 @@ def test_transform_image_pyramid_support(mocker): mocker.patch('requests.get', side_effect=[ mock_image_pyramid_support(), mock_empty_descendants(), + mock_empty_parents(), ]) image_pyramid_input_doc = { 'uuid': '0bf9cb40adebcfb261dfbe9244607508', @@ -282,7 +341,7 @@ def test_transform_image_pyramid_support(mocker): 'entity_type': 'Dataset', } - add_assay_details(image_pyramid_input_doc, transformation_resources) + add_assay_details(image_pyramid_input_doc, mock_transformation_resources) assert image_pyramid_input_doc == image_pyramid_output_doc @@ -296,15 +355,26 @@ def mock_epic(uuid=None, headers=None): "vitessce-hints": [ "segmentation_mask", "is_image", - "pyramid" + "pyramid", + "epic" ] }) +def mock_epic_parents(): + return mock_response([ + { + "uuid": "parent_dataset_uuid_001", + "entity_type": "Dataset", + } + ]) + + def test_transform_epic(mocker): mocker.patch('requests.get', side_effect=[ mock_epic(), mock_empty_descendants(), + mock_epic_parents(), ]) epic_input_doc = { 'uuid': 'abc123', @@ -328,13 +398,13 @@ def test_transform_epic(mocker): "segmentation_mask", "is_image", "pyramid", - + "epic", ], - 'visualization': False, + 'visualization': True, 'entity_type': 'Dataset', } - add_assay_details(epic_input_doc, transformation_resources) + add_assay_details(epic_input_doc, mock_transformation_resources) assert epic_input_doc == epic_output_doc diff --git a/src/hubmap_translator.py b/src/hubmap_translator.py index ac2b0976..8e99f208 100644 --- a/src/hubmap_translator.py +++ b/src/hubmap_translator.py @@ -169,6 +169,7 @@ def __init__(self, indices, app_client_id, app_client_secret, token, ontology_ap self.transformation_resources = {'ingest_api_soft_assay_url': self.ingest_api_soft_assay_url, 'organ_map': self.get_organ_types(), 'descendants_url': f'{self.entity_api_url}/descendants', + 'parents_url': f'{self.entity_api_url}/parents', 'token': token,} diff --git a/src/requirements.txt b/src/requirements.txt index bb628f50..c6db3cdd 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,17 +1,17 @@ -Flask==3.0.3 +Flask==3.1.3 jsonschema==4.25.1 # The commons package requires requests>=2.22.0 and PyYAML>=5.3.1 requests==2.32.5 PyYAML==6.0.3 -portal-visualization==0.4.22 +portal-visualization==0.5.1 # Use the published package from PyPI as default # Use the branch name of commons from github for testing new changes made in commons from different branch # Default is main branch specified in search-api's docker-compose.development.yml if not set # git+https://github.com/hubmapconsortium/commons.git@${COMMONS_BRANCH}#egg=hubmap-commons -hubmap-commons==2.1.22 +hubmap-commons==2.1.23 atlas-consortia-jobq>=0.1.0 # The use of `-r` lets us specify the transitive requirements in one place -r search-adaptor/src/requirements.txt diff --git a/src/search-adaptor b/src/search-adaptor index 386a117d..5aae76e2 160000 --- a/src/search-adaptor +++ b/src/search-adaptor @@ -1 +1 @@ -Subproject commit 386a117dc361d3aae55004a07e6d5b101a6ecc92 +Subproject commit 5aae76e2a50039ed8592877a98d880389925ca90