From 9284d2fc0dfc361e86ec8e95459fb659fb1bf10e Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 25 Sep 2025 20:19:14 -0400 Subject: [PATCH 1/5] modified the after update triggers to link datasets and samples to their direct ancestors in a way that no longer recreate the activity and all linkeages. Because the existing implementation involved using query functions that deleted everything, it became necessary to make new query functions to accommodate this change, and now we have a solution that handles each ancestory individual, either deleting or adding --- src/schema/schema_neo4j_queries.py | 110 +++++++++++++++++++++++++++++ src/schema/schema_triggers.py | 59 +++++++++------- 2 files changed, 142 insertions(+), 27 deletions(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 80b68be5..f2a9c0d7 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -718,6 +718,70 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u tx.rollback() raise TransactionError(msg) + + +""" +Create linkages from new direct ancestors to an EXISTING activity node in neo4j. + + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +entity_uuid : str + The uuid of target child entity +new_ancestor_uuid : str + The uuid of new direct ancestor to be linked +activity_uuid : str + The uuid of the existing activity node to link to +""" +def add_new_ancestors_to_existing_activity(neo4j_driver, new_ancestor_uuids, activity_uuid): + try: + with neo4j_driver.session() as session: + tx = session.begin_transaction() + + create_outgoing_activity_relationships_tx(tx=tx + , source_node_uuids=new_ancestor_uuids + , activity_node_uuid=activity_uuid) + + tx.commit() + except TransactionError as te: + msg = "TransactionError from calling add_new_ancestors_to_existing_activity(): " + logger.exception(msg) + + if tx.closed() == False: + logger.error("Failed to commit add_new_ancestors_to_existing_activity() transaction, rollback") + tx.rollback() + + raise TransactionError(msg) + +""" +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +entity_uuid : str + The uuid of the target entity nodeget_paren + +Returns +------- +str + The uuid of the direct ancestor Activity node +""" +def get_parent_activity_uuid_from_entity(neo4j_driver, entity_uuid): + query = """ + MATCH (activity:Activity)-[:ACTIVITY_OUTPUT]->(entity:Entity {uuid: $entity_uuid}) + RETURN activity.uuid AS activity_uuid + """ + + with neo4j_driver.session() as session: + result = session.run(query, entity_uuid=entity_uuid) + + record = result.single() + if record: + return record["activity_uuid"] + else: + return None """ @@ -1821,6 +1885,52 @@ def _delete_activity_node_and_linkages_tx(tx, uuid): result = tx.run(query) +""" +Delete only the ACTIVITY_INPUT linkages between a target entity and a specific set of its direct ancestors. +The Activity node and the entity nodes remain intact. + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +entity_uuid : str + The uuid of the target child entity +ancestor_uuids : list + A list of uuids of ancestors whose relationships should be deleted +""" +def delete_ancestor_linkages_tx(neo4j_driver, entity_uuid, ancestor_uuids): + query = ( + "MATCH (a:Entity)-[r:ACTIVITY_INPUT]->(activity:Activity)-[:ACTIVITY_OUTPUT]->(t:Entity {uuid: $entity_uuid}) " + "WHERE a.uuid IN $ancestor_uuids " + "DELETE r" + ) + + logger.info("======delete_ancestor_linkages_tx() query======") + logger.debug(query) + + try: + with neo4j_driver.session() as session: + tx = session.begin_transaction() + + result = tx.run( + query, + entity_uuid=entity_uuid, + ancestor_uuids=ancestor_uuids + ) + + + tx.commit() + + except TransactionError as te: + msg = "TransactionError from calling delete_ancestor_linkages_tx(): " + logger.exception(msg) + + if tx.closed() == False: + logger.error("Failed to commit delete_ancestor_linkages_tx() transaction, rollback") + tx.rollback() + + raise TransactionError(msg) + """ Delete linkages between a publication and its associated collection diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index c5d83de1..8c276540 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -788,20 +788,24 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, user_token, dataset_uuid = existing_data_dict['uuid'] direct_ancestor_uuids = existing_data_dict['direct_ancestor_uuids'] - # Generate property values for Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_type, user_token, existing_data_dict) - - try: - # Create a linkage (via one Activity node) between the dataset node and its direct ancestors in neo4j - schema_neo4j_queries.link_entity_to_direct_ancestors(schema_manager.get_neo4j_driver_instance(), dataset_uuid, direct_ancestor_uuids, activity_data_dict) + existing_dataset_ancestor_uuids = schema_neo4j_queries.get_dataset_direct_ancestors(schema_manager.get_neo4j_driver_instance(), dataset_uuid, "uuid") + new_ancestors = set(direct_ancestor_uuids)-set(existing_dataset_ancestor_uuids) + ancestors_to_unlink = set(existing_dataset_ancestor_uuids)-set(direct_ancestor_uuids) + activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid) + if new_ancestors: + + try: + schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid) + except TransactionError: + raise - # Delete the cache of this dataset if any cache exists - # Because the `Dataset.direct_ancestors` field - schema_manager.delete_memcached_cache([dataset_uuid]) - except TransactionError: - # No need to log - raise + if ancestors_to_unlink: + try: + schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), dataset_uuid, list(ancestors_to_unlink)) + except TransactionError: + raise + """ Trigger event method for creating or recreating linkages between this new Collection and the Datasets it contains @@ -1653,10 +1657,10 @@ def update_status(property_key, normalized_type, user_token, existing_data_dict, """ def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: - raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'update_status()' trigger method.") uuid = existing_data_dict['uuid'] if 'status' not in existing_data_dict: - raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'update_status()' trigger method.") status = existing_data_dict['status'] if status.lower() != "published": children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') @@ -1834,21 +1838,22 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, user_token, ex # Build a list of direct ancestor uuids # Only one uuid in the list in this case direct_ancestor_uuids = [existing_data_dict['direct_ancestor_uuid']] + existing_sample_ancestor_uuids = schema_neo4j_queries.get_sample_direct_ancestor(schema_manager.get_neo4j_driver_instance(), sample_uuid, "uuid") + new_ancestors = set(direct_ancestor_uuids)-set(existing_sample_ancestor_uuids) + ancestors_to_unlink = set(existing_sample_ancestor_uuids)-set(direct_ancestor_uuids) + activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), sample_uuid) + if new_ancestors: - # Generate property values for Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_type, user_token, existing_data_dict) - - try: - # Create a linkage (via Activity node) - # between the Sample node and the source entity node in neo4j - schema_neo4j_queries.link_entity_to_direct_ancestors(schema_manager.get_neo4j_driver_instance(), sample_uuid, direct_ancestor_uuids, activity_data_dict) + try: + schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid) + except TransactionError: + raise - # Delete the cache of sample if any cache exists - # Because the `Sample.direct_ancestor` field can be updated - schema_manager.delete_memcached_cache([sample_uuid]) - except TransactionError: - # No need to log - raise + if ancestors_to_unlink: + try: + schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), sample_uuid, list(ancestors_to_unlink)) + except TransactionError: + raise """ Trigger event method of creating or recreating linkages between this new publication and its associated_collection From bcb7dd1ebb84affaad01a84d6fa0741fe0274391 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 1 Oct 2025 17:21:22 -0400 Subject: [PATCH 2/5] fixed a bug introduced with the new reworked triggers for linking direct ancestors that caused activities to not be created on a POST --- src/schema/schema_neo4j_queries.py | 6 ++++-- src/schema/schema_triggers.py | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 443f58eb..292d250d 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -766,11 +766,13 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u activity_uuid : str The uuid of the existing activity node to link to """ -def add_new_ancestors_to_existing_activity(neo4j_driver, new_ancestor_uuids, activity_uuid): +def add_new_ancestors_to_existing_activity(neo4j_driver, new_ancestor_uuids, activity_uuid, create_activity, activity_data_dict, dataset_uuid): try: with neo4j_driver.session() as session: tx = session.begin_transaction() - + if create_activity: + create_activity_tx(tx, activity_data_dict) + create_relationship_tx(tx, activity_uuid, dataset_uuid, 'ACTIVITY_OUTPUT', '->') create_outgoing_activity_relationships_tx(tx=tx , source_node_uuids=new_ancestor_uuids , activity_node_uuid=activity_uuid) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 08817c9a..f18f1060 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -892,7 +892,8 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use if 'direct_ancestor_uuids' not in new_data_dict: raise KeyError("Missing 'direct_ancestor_uuids' key in 'new_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") - + create_activity = False + activity_data_dict = {} dataset_uuid = existing_data_dict['uuid'] direct_ancestor_uuids = new_data_dict['direct_ancestor_uuids'] @@ -900,10 +901,16 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use new_ancestors = set(direct_ancestor_uuids)-set(existing_dataset_ancestor_uuids) ancestors_to_unlink = set(existing_dataset_ancestor_uuids)-set(direct_ancestor_uuids) activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid) + + if not activity_uuid: + activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) + activity_uuid = activity_data_dict['uuid'] + create_activity = True + if new_ancestors: try: - schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid) + schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid, create_activity, activity_data_dict, dataset_uuid) except TransactionError: raise @@ -1911,7 +1918,8 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ if 'direct_ancestor_uuid' not in new_data_dict: raise KeyError("Missing 'direct_ancestor_uuid' key in 'new_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") - + create_activity = False + activity_data_dict = {} sample_uuid = existing_data_dict['uuid'] # Build a list of direct ancestor uuids @@ -1921,10 +1929,16 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ new_ancestors = set(direct_ancestor_uuids)-set(existing_sample_ancestor_uuids) ancestors_to_unlink = set(existing_sample_ancestor_uuids)-set(direct_ancestor_uuids) activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), sample_uuid) + + if not activity_uuid: + activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) + activity_uuid = activity_data_dict['uuid'] + create_activity = True + if new_ancestors: try: - schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid) + schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid, create_activity, activity_data_dict, sample_uuid) except TransactionError: raise From 194033ae0c08f9b3659bc95e379dc8d919819148 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 2 Oct 2025 13:48:06 -0400 Subject: [PATCH 3/5] removed 2 outdated comments about linking to ancestors --- src/schema/provenance_schema.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 11f37911..fb0debe1 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -462,7 +462,6 @@ ENTITIES: exposed: false indexed: false description: "The uuids of source entities from which this new entity is derived. Used to pass source entity ids in on POST or PUT calls used to create the linkages." - # Note: link_dataset_to_direct_ancestors() will always delete all the old linkages first after_create_trigger: link_dataset_to_direct_ancestors after_update_trigger: link_dataset_to_direct_ancestors direct_ancestors: @@ -1034,7 +1033,6 @@ ENTITIES: exposed: false indexed: false description: "The uuid of source entity from which this new entity is derived from. Used on creation or edit to create an action and relationship to the ancestor. The direct ancestor must be a Donor or Sample. If the direct ancestor is a Donor, the sample must be of type organ." - # Note: link_sample_to_direct_ancestor() will always delete all the old linkages first after_create_trigger: link_sample_to_direct_ancestor after_update_trigger: link_sample_to_direct_ancestor before_property_update_validators: From f447d7d0b5bc7afe975816a4656d70f6b5055c43 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 3 Oct 2025 04:01:08 -0400 Subject: [PATCH 4/5] added additional logging to the link to direct ancestor triggers to show ancestors linked, unliked, or if there are none. Also rewrote link_sample_to_direct_ancestored to better handling ancestors as lists or strings --- src/schema/schema_triggers.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index f18f1060..5da56545 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -907,18 +907,22 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use activity_uuid = activity_data_dict['uuid'] create_activity = True - if new_ancestors: - + if new_ancestors: + logger.info(f"Linking the following new ancestors: {', '.join(new_ancestors)}") try: schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid, create_activity, activity_data_dict, dataset_uuid) except TransactionError: raise if ancestors_to_unlink: + logger.info(f"Unlinking the following ancestors: {', '.join(ancestors_to_unlink)}") try: schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), dataset_uuid, list(ancestors_to_unlink)) except TransactionError: raise + + if not(ancestors_to_unlink or new_ancestors): + logger.info("No new ancestors linked, nor old ancestors unlinked") """ @@ -1921,13 +1925,19 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ create_activity = False activity_data_dict = {} sample_uuid = existing_data_dict['uuid'] + new_ancestors = None + ancestors_to_unlink = None # Build a list of direct ancestor uuids # Only one uuid in the list in this case direct_ancestor_uuids = [new_data_dict['direct_ancestor_uuid']] existing_sample_ancestor_uuids = schema_neo4j_queries.get_sample_direct_ancestor(schema_manager.get_neo4j_driver_instance(), sample_uuid, "uuid") - new_ancestors = set(direct_ancestor_uuids)-set(existing_sample_ancestor_uuids) - ancestors_to_unlink = set(existing_sample_ancestor_uuids)-set(direct_ancestor_uuids) + if isinstance(existing_sample_ancestor_uuids, list): + existing_sample_ancestor_uuids = existing_sample_ancestor_uuids[0] + if existing_sample_ancestor_uuids and existing_sample_ancestor_uuids != direct_ancestor_uuids: + new_ancestors = direct_ancestor_uuids + if not existing_sample_ancestor_uuids: + new_ancestors = direct_ancestor_uuids activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), sample_uuid) if not activity_uuid: @@ -1936,17 +1946,22 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ create_activity = True if new_ancestors: - + ancestors_to_unlink = existing_sample_ancestor_uuids + logger.info(f"Linking the following new ancestors: {new_ancestors}") try: - schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid, create_activity, activity_data_dict, sample_uuid) + schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), new_ancestors, activity_uuid, create_activity, activity_data_dict, sample_uuid) except TransactionError: raise - + if ancestors_to_unlink: + logger.info(f"Unlinking the following ancestor: {ancestors_to_unlink}") try: - schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), sample_uuid, list(ancestors_to_unlink)) + schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), sample_uuid, [ancestors_to_unlink]) except TransactionError: raise + + if not(ancestors_to_unlink or new_ancestors): + logger.info("No new ancestors linked, nor old ancestors unlinked") """ TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE From c2655be086025469ca94a3ebd43037e8f176cc3b Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 6 Oct 2025 03:03:35 -0400 Subject: [PATCH 5/5] modified the schema trigger for updating sample direct ancestors. It now gracefully handles all incoming data as a list regardless if its only one value or not, and addresses earlier bugs --- src/schema/schema_triggers.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 5da56545..b83514de 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1919,50 +1919,47 @@ def delete_metadata_files(property_key, normalized_type, request, user_token, ex def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") - if 'direct_ancestor_uuid' not in new_data_dict: raise KeyError("Missing 'direct_ancestor_uuid' key in 'new_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") + create_activity = False activity_data_dict = {} sample_uuid = existing_data_dict['uuid'] new_ancestors = None ancestors_to_unlink = None - - # Build a list of direct ancestor uuids - # Only one uuid in the list in this case - direct_ancestor_uuids = [new_data_dict['direct_ancestor_uuid']] - existing_sample_ancestor_uuids = schema_neo4j_queries.get_sample_direct_ancestor(schema_manager.get_neo4j_driver_instance(), sample_uuid, "uuid") - if isinstance(existing_sample_ancestor_uuids, list): - existing_sample_ancestor_uuids = existing_sample_ancestor_uuids[0] - if existing_sample_ancestor_uuids and existing_sample_ancestor_uuids != direct_ancestor_uuids: - new_ancestors = direct_ancestor_uuids + direct_ancestor_uuids = new_data_dict['direct_ancestor_uuid'] + existing_sample_ancestor_uuids = schema_neo4j_queries.get_parents(schema_manager.get_neo4j_driver_instance(), sample_uuid, "uuid") + if direct_ancestor_uuids not in existing_sample_ancestor_uuids: + new_ancestors = [direct_ancestor_uuids] if not existing_sample_ancestor_uuids: - new_ancestors = direct_ancestor_uuids - activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), sample_uuid) + new_ancestors = [direct_ancestor_uuids] + activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), sample_uuid) if not activity_uuid: activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) activity_uuid = activity_data_dict['uuid'] create_activity = True - if new_ancestors: - ancestors_to_unlink = existing_sample_ancestor_uuids logger.info(f"Linking the following new ancestors: {new_ancestors}") try: schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), new_ancestors, activity_uuid, create_activity, activity_data_dict, sample_uuid) except TransactionError: raise - + ancestors_to_unlink = existing_sample_ancestor_uuids + else: + ancestors_to_unlink = existing_sample_ancestor_uuids + ancestors_to_unlink.remove(direct_ancestor_uuids) if ancestors_to_unlink: logger.info(f"Unlinking the following ancestor: {ancestors_to_unlink}") try: - schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), sample_uuid, [ancestors_to_unlink]) + schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), sample_uuid, ancestors_to_unlink) except TransactionError: raise if not(ancestors_to_unlink or new_ancestors): logger.info("No new ancestors linked, nor old ancestors unlinked") + """ TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE