From 7560e6e6e79209e4e7167d2f54edd192aa24ebee Mon Sep 17 00:00:00 2001 From: Akhil Anto Tharayil Date: Wed, 29 Oct 2025 15:49:02 +0530 Subject: [PATCH 1/4] New mask script commit --- anonymiseScripts/anonymise-standard/README.md | 23 ++++ .../mask_odoo16_sensitive_data.sql | 92 ++++++++++++++++ .../mask_openelis_sensitive_data.sql | 68 ++++++++++++ .../mask_openmrs_sensitive_data.sql | 100 ++++++++++++++++++ 4 files changed, 283 insertions(+) create mode 100644 anonymiseScripts/anonymise-standard/README.md create mode 100644 anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql create mode 100644 anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql create mode 100644 anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql diff --git a/anonymiseScripts/anonymise-standard/README.md b/anonymiseScripts/anonymise-standard/README.md new file mode 100644 index 0000000..ac8b3d6 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/README.md @@ -0,0 +1,23 @@ +# Anonymisation SQL Scripts + +These SQL scripts are designed to **mask or anonymise sensitive data** across different Bahmni modules — **OpenMRS**, **OpenELIS**, and **Odoo 16** — primarily for creating safe, privacy-compliant staging or demo environments. + +## 📌 Overview + +| Script Name | Application | Purpose | +|--------------|--------------|----------| +| `mask_openmrs_sensitive_data.sql` | **OpenMRS** | Masks patient identifiers, names, addresses, phone numbers, and other personally identifiable information (PII) in the OpenMRS database. | +| `mask_openelis_sensitive_data.sql` | **OpenELIS** | Masks patient-related data in the laboratory information system, such as names and identifiers, ensuring test results remain linked but anonymised. | +| `mask_odoo16_sensitive_data.sql` | **Odoo 16** | Masks customer, supplier, and employee details (like names, emails, contact numbers, and addresses) in the Odoo 16 ERP system used in Bahmni. | + +--- + +## 🧠 Purpose + +These scripts help organisations using Bahmni to: +- Use **realistic datasets** from production for testing or training. +- Ensure **no personally identifiable data (PII)** is exposed in staging, demo, or shared environments. +- Comply with **data protection and privacy regulations**. + +--- + diff --git a/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql new file mode 100644 index 0000000..73223e3 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql @@ -0,0 +1,92 @@ +\set ON_ERROR_STOP true + +-- =============================== +-- TRUNCATE TABLES +-- =============================== +TRUNCATE res_partner_attributes; +TRUNCATE failed_events; +TRUNCATE mail_message CASCADE; +TRUNCATE mail_followers CASCADE; + +-- =============================== +-- MASK ADDRESSES +-- =============================== +UPDATE res_partner +SET street = concat('address1-', id), + street2 = concat('address2-', id), + city = concat('address3-', id); + +-- =============================== +-- MASK USERS (EXCLUDE admin, emrsync) +-- =============================== +UPDATE res_users +SET login = concat('user-', id) +WHERE login NOT IN ('admin', 'emrsync'); + +-- Set everyone's password as 'password' (hashed) +UPDATE res_users +SET password = '$1$lw8k34ec$xOY5xkPtTgTw/gN6nHiZq.' -- hashed password placeholder +WHERE login NOT IN ('admin', 'emrsync'); + +-- =============================== +-- MASK EVENT RECORDS +-- =============================== +UPDATE event_records +SET object = regexp_replace(object, '"customerId": "(.+?)"', '"customerId": "XXX123456"') +WHERE object::text LIKE '%customerId%'; + +-- =============================== +-- MASK MAIL ALIAS +-- =============================== +UPDATE mail_alias +SET alias_name = concat('alias-', id); + +-- =============================== +-- MASK PARTNER NAMES +-- =============================== +UPDATE res_partner +SET name = concat('User ', id); + +-- =============================== +-- REMOVE PARTNER IMAGES (stored in ir_attachment) +-- =============================== +UPDATE ir_attachment +SET db_datas = NULL +WHERE res_model = 'res.partner' AND res_field LIKE 'image%'; + +-- =============================== +-- SELECT QUERIES TO VERIFY CHANGES +-- =============================== + +-- Verify partner addresses +SELECT id, street, street2, city +FROM res_partner +LIMIT 5; + +-- Verify masked users +SELECT id, login, password +FROM res_users +WHERE login NOT IN ('admin', 'emrsync') +LIMIT 5; + +-- Verify event records masking +SELECT id, object +FROM event_records +WHERE object::text LIKE '%XXX123456%' +LIMIT 5; + +-- Verify mail_alias +SELECT id, alias_name +FROM mail_alias +LIMIT 5; + +-- Verify partner names +SELECT id, name +FROM res_partner +LIMIT 5; + +-- Verify partner images removed +SELECT id, name, res_field, db_datas IS NULL AS image_removed +FROM ir_attachment +WHERE res_model = 'res.partner' AND res_field LIKE 'image%' +LIMIT 5; diff --git a/anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql new file mode 100644 index 0000000..747a831 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql @@ -0,0 +1,68 @@ +\set ON_ERROR_STOP true + +-- 1️⃣ Clear failed events +TRUNCATE failed_events; +-- Verify that the table is empty +SELECT COUNT(*) AS failed_events_count FROM failed_events LIMIT 5; + +-- 2️⃣ Mask addresses (organization_address) +UPDATE organization_address AS oa +SET value = CONCAT('address-', oa.organization_id); + +-- Verify the changes in organization_address +SELECT * +FROM organization_address +LIMIT 5; + +-- 3️⃣ Mask patient identities +UPDATE patient_identity AS pi +SET identity_data = CONCAT('PRIMARYRELATIVE-', pi.patient_id); + +-- Verify the changes in patient_identity +SELECT * +FROM patient_identity +LIMIT 5; + +-- 4️⃣ Mask system users login (skip atomfeed users) +UPDATE system_user su +SET login_name = CONCAT('user-', lu.id) +FROM login_user lu +WHERE lu.login_name = su.login_name + AND su.login_name NOT IN ('admin', 'atomfeed'); + +-- Verify the changes in system_user (excluding atomfeed) +SELECT id, login_name +FROM system_user +WHERE login_name NOT IN ('admin', 'atomfeed') LIMIT 5; + +-- 5️⃣ Set everyone's password as adminADMIN! +UPDATE login_user +SET login_name = CONCAT('user-', id), + password = 'adminADMIN!' +WHERE login_name NOT IN ('admin', 'atomfeed'); + +-- Verify the changes in login_user +SELECT id, login_name, password +FROM login_user +WHERE login_name NOT IN ('admin', 'atomfeed') LIMIT 5; + +-- 6️⃣ Fix system_user login for users missing in login_user +UPDATE system_user su +SET login_name = CONCAT('userwologin-', su.id) +WHERE su.login_name NOT IN (SELECT login_name FROM login_user); + +-- Verify the changes in system_user (missing login_user entries) +SELECT id, login_name +FROM system_user +WHERE login_name LIKE 'userwologin-%' LIMIT 5; + +-- 7️⃣ Set first_name and last_name = login_name (skip atomfeed) +UPDATE system_user +SET first_name = login_name, + last_name = login_name +WHERE login_name NOT IN ('atomfeed'); + +-- Verify the changes in system_user (first_name and last_name) +SELECT id, first_name, last_name +FROM system_user +WHERE first_name = last_name LIMIT 5; diff --git a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql new file mode 100644 index 0000000..b10d87f --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql @@ -0,0 +1,100 @@ +SET FOREIGN_KEY_CHECKS=0; + +-- 1️⃣ Truncate tables not needed for testing/logs +TRUNCATE TABLE concept_proposal_tag_map; +TRUNCATE TABLE concept_proposal; +TRUNCATE TABLE hl7_in_archive; +TRUNCATE TABLE hl7_in_error; +TRUNCATE TABLE hl7_in_queue; +TRUNCATE TABLE user_property; +TRUNCATE TABLE notification_alert_recipient; +TRUNCATE TABLE notification_alert; +TRUNCATE TABLE failed_events; + +SET FOREIGN_KEY_CHECKS=1; + +-- 2️⃣ Mask person names +UPDATE person_name +SET given_name = CONCAT('Patient', person_id), + middle_name = CONCAT('Patient', person_id), + family_name = CONCAT('Patient', person_id); + +-- Verify the changes in person_name +SELECT person_id, given_name, middle_name, family_name +FROM person_name +WHERE given_name LIKE 'Patient%' LIMIT 5; + +-- 3️⃣ Mask birthdates (simple +/- 90 days) +UPDATE person +SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*180-90) DAY) +WHERE birthdate IS NOT NULL; + +-- Verify the changes in person birthdate +SELECT person_id, birthdate +FROM person +WHERE birthdate IS NOT NULL LIMIT 5; + +-- 4️⃣ Mask death dates (simple +/- 90 days) +UPDATE person +SET death_date = DATE_ADD(death_date, INTERVAL FLOOR(RAND()*180-90) DAY) +WHERE death_date IS NOT NULL; + +-- Verify the changes in person death_date +SELECT person_id, death_date +FROM person +WHERE death_date IS NOT NULL LIMIT 5; + +-- 5️⃣ Mask addresses +UPDATE person_address +SET address1 = CONCAT('Address1-', person_id), + address2 = CONCAT('Address2-', person_id), + latitude = NULL, + longitude = NULL; + +-- Verify the changes in person_address +SELECT person_id, address1, address2, latitude, longitude +FROM person_address +WHERE address1 LIKE 'Address1-%' LIMIT 5; + +-- 6️⃣ Mask locations +UPDATE location +SET name = CONCAT('Location-', location_id); + +-- Verify the changes in location +SELECT location_id, name +FROM location +WHERE name LIKE 'Location-%' LIMIT 5; + +-- 7️⃣ Mask usernames and passwords +UPDATE users +SET username = CONCAT('username-', user_id), + password = 'dummy_password' +WHERE username NOT IN ('admin', 'superman', 'superuser'); + +-- Verify the changes in users +SELECT user_id, username, password +FROM users +WHERE username NOT IN ('admin', 'superman', 'superuser') LIMIT 5; + +-- 8️⃣ Mask person attributes +UPDATE person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +SET pa.value = CONCAT(pat.name, '-', pa.person_id) +WHERE pat.name IN ('primaryRelative', 'primaryContact'); + +-- Verify the changes in person_attribute +SELECT pa.person_id, pa.value, pat.name +FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('primaryRelative', 'primaryContact') LIMIT 5; + +-- 9️⃣ Clear sensitive local names +DELETE pa FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal'); + +-- Verify that the sensitive local names were deleted +SELECT person_id, pa.value, pat.name +FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal') LIMIT 5; From 8fa6a526fc02172527def73968aae211803d56bb Mon Sep 17 00:00:00 2001 From: Akhil Anto Tharayil Date: Thu, 30 Oct 2025 16:46:54 +0530 Subject: [PATCH 2/4] Added conditions --- anonymiseScripts/anonymise-standard/README.md | 71 +++++-- .../mask_odoo10_sensitive_data.sql | 99 +++++++++ .../mask_odoo16_sensitive_data.sql | 9 +- .../mask_openmrs_sensitive_data.sql | 196 ++++++++++++------ 4 files changed, 304 insertions(+), 71 deletions(-) create mode 100644 anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql diff --git a/anonymiseScripts/anonymise-standard/README.md b/anonymiseScripts/anonymise-standard/README.md index ac8b3d6..4d9bc46 100644 --- a/anonymiseScripts/anonymise-standard/README.md +++ b/anonymiseScripts/anonymise-standard/README.md @@ -1,23 +1,70 @@ -# Anonymisation SQL Scripts -These SQL scripts are designed to **mask or anonymise sensitive data** across different Bahmni modules — **OpenMRS**, **OpenELIS**, and **Odoo 16** — primarily for creating safe, privacy-compliant staging or demo environments. -## 📌 Overview +# SQL Scripts for Data Anonymisation -| Script Name | Application | Purpose | -|--------------|--------------|----------| -| `mask_openmrs_sensitive_data.sql` | **OpenMRS** | Masks patient identifiers, names, addresses, phone numbers, and other personally identifiable information (PII) in the OpenMRS database. | -| `mask_openelis_sensitive_data.sql` | **OpenELIS** | Masks patient-related data in the laboratory information system, such as names and identifiers, ensuring test results remain linked but anonymised. | -| `mask_odoo16_sensitive_data.sql` | **Odoo 16** | Masks customer, supplier, and employee details (like names, emails, contact numbers, and addresses) in the Odoo 16 ERP system used in Bahmni. | +These SQL scripts are designed to **mask or anonymise sensitive data** across Bahmni modules — **OpenMRS**, **OpenELIS**, and **Odoo (10 & 16)** — enabling safe use of production-like datasets in **staging, demo, or training environments** while remaining privacy-compliant. + +--- + +## ⚙️ How to Execute + +### 1) OpenMRS Database + +Copy the script to the `openmrsdb` container and run: + +```bash +docker compose exec -it openmrsdb sh +mysql -uroot -padminAdmin!123 openmrs < /mask_openmrs_sensitive_data.sql +``` + +### 2) OpenELIS Database + +Copy the script to the `openelisdb` container and run: + +```bash +docker compose exec -it openelisdb sh +psql -U clinlims -d clinlims < /mask_openelis_sensitive_data.sql +``` + +### 3) Odoo Database (10 & 16) + +Copy the script to the `odoodb` container and run the respective version: + +**Odoo 16:** + +```bash +docker compose exec -it odoodb sh +psql -U odoo -d odoo < /mask_odoo16_sensitive_data.sql +``` + +**Odoo 10:** + +```bash +docker compose exec -it odoodb sh +psql -U odoo -d odoo < /mask_odoo10_sensitive_data.sql +``` + +--- + +## 📌 Overview of Scripts + +| Script Name | Application | Purpose | +| ---------------------------------- | ------------ | --------------------------------------------------------------------------------------------- | +| `mask_openmrs_sensitive_data.sql` | **OpenMRS** | Masks patient identifiers, names, addresses, phone numbers, and other PII. | +| `mask_openelis_sensitive_data.sql` | **OpenELIS** | Anonymises patient-related data while preserving test result links. | +| `mask_odoo16_sensitive_data.sql` | **Odoo 16** | Masks customer, supplier, and employee information (names, emails, phone numbers, addresses). | +| `mask_odoo10_sensitive_data.sql` | **Odoo 10** | Masks customer, supplier, and employee information in Odoo 10. | --- ## 🧠 Purpose -These scripts help organisations using Bahmni to: -- Use **realistic datasets** from production for testing or training. -- Ensure **no personally identifiable data (PII)** is exposed in staging, demo, or shared environments. -- Comply with **data protection and privacy regulations**. +These scripts allow organizations using Bahmni to: + +* Work with **realistic datasets** for testing, development, or training. +* Ensure **no personally identifiable information (PII)** is exposed in non-production environments. +* Comply with **data protection and privacy regulations**. --- + diff --git a/anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql new file mode 100644 index 0000000..fadf430 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql @@ -0,0 +1,99 @@ +\set ON_ERROR_STOP true + +-- =============================== +-- TRUNCATE TABLES +-- =============================== +TRUNCATE failed_events; +TRUNCATE mail_message CASCADE; +TRUNCATE mail_followers CASCADE; + +-- =============================== +-- MASK ADDRESSES +-- =============================== +UPDATE res_partner +SET street = concat('address1-', id), + street2 = concat('address2-', id), + city = concat('address3-', id); + +-- =============================== +-- MASK USERS (EXCLUDE admin, emrsync) +-- =============================== +UPDATE res_users +SET login = concat('user-', id) +WHERE login NOT IN ('admin', 'emrsync'); + +-- Set everyone's password as 'password' (hashed) +UPDATE res_users +SET password = '$1$lw8k34ec$xOY5xkPtTgTw/gN6nHiZq.' +WHERE login NOT IN ('admin', 'emrsync'); + +-- =============================== +-- MASK EVENT RECORDS +-- =============================== +UPDATE event_records +SET object = regexp_replace(object, '"customerId": "(.+?)"', '"customerId": "XXX123456"') +WHERE object::text LIKE '%customerId%'; + +-- =============================== +-- MASK MAIL ALIAS +-- =============================== +UPDATE mail_alias +SET alias_name = concat('alias-', id); + +-- =============================== +-- MASK PARTNER NAMES +-- =============================== +UPDATE res_partner +SET name = concat('User ', id), +display_name = concat('User ', id); + +-- =============================== +-- MASK PROVIDER NAME IN SALE ORDER +-- =============================== +UPDATE sale_order +SET provider_name = 'Provider-' || id; + +-- =============================== +-- REMOVE PARTNER IMAGES (stored in ir_attachment) +-- =============================== +UPDATE ir_attachment +SET db_datas = NULL +WHERE res_model = 'res.partner' AND res_field LIKE 'image%'; + +-- =============================== +-- SELECT QUERIES TO VERIFY CHANGES +-- =============================== + +-- Verify partner addresses +SELECT id, street, street2, city +FROM res_partner +LIMIT 5; + +-- Verify masked users +SELECT id, login, password +FROM res_users +WHERE login NOT IN ('admin', 'emrsync') +LIMIT 5; + +-- Verify event records masking +SELECT id, object +FROM event_records +WHERE object::text LIKE '%XXX123456%' +LIMIT 5; + +-- Verify mail_alias +SELECT id, alias_name +FROM mail_alias +LIMIT 5; + +-- Verify partner names +SELECT id, name +FROM res_partner +LIMIT 5; + +-- Verify partner images removed +SELECT id, name, res_field, db_datas IS NULL AS image_removed +FROM ir_attachment +WHERE res_model = 'res.partner' AND res_field LIKE 'image%' +LIMIT 5; + diff --git a/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql index 73223e3..61f7532 100644 --- a/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql +++ b/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql @@ -45,7 +45,14 @@ SET alias_name = concat('alias-', id); -- MASK PARTNER NAMES -- =============================== UPDATE res_partner -SET name = concat('User ', id); +SET name = concat('User ', id), +display_name = concat('User ', id); + +-- =============================== +-- MASK PROVIDER NAME IN SALE ORDER +-- =============================== +UPDATE sale_order +SET provider_name = 'Provider-' || id; -- =============================== -- REMOVE PARTNER IMAGES (stored in ir_attachment) diff --git a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql index b10d87f..d1505f7 100644 --- a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql +++ b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql @@ -14,87 +14,167 @@ TRUNCATE TABLE failed_events; SET FOREIGN_KEY_CHECKS=1; -- 2️⃣ Mask person names -UPDATE person_name -SET given_name = CONCAT('Patient', person_id), - middle_name = CONCAT('Patient', person_id), - family_name = CONCAT('Patient', person_id); +DROP TABLE IF EXISTS random_names; + +CREATE TABLE random_names ( + rid INT NOT NULL AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + PRIMARY KEY (rid), + UNIQUE KEY name (name) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +INSERT IGNORE INTO random_names (name) +SELECT DISTINCT TRIM(given_name) FROM person_name WHERE given_name IS NOT NULL; +INSERT IGNORE INTO random_names (name) +SELECT DISTINCT TRIM(middle_name) FROM person_name WHERE middle_name IS NOT NULL; +INSERT IGNORE INTO random_names (name) +SELECT DISTINCT TRIM(family_name) FROM person_name WHERE family_name IS NOT NULL; + +DROP PROCEDURE IF EXISTS randomize_names; +DELIMITER // +CREATE PROCEDURE randomize_names() +BEGIN + DECLARE _size INT; + DECLARE _start INT DEFAULT 0; + DECLARE _stepsize INT DEFAULT 300; + + SELECT MAX(person_name_id) INTO _size FROM person_name; + + WHILE _start < _size DO + UPDATE person_name + SET given_name = ( + SELECT name FROM ( + SELECT rn.rid, rn.name + FROM random_names rn + ORDER BY RAND() + LIMIT 1 + ) tmp + ), + middle_name = given_name, + family_name = middle_name + WHERE person_name_id BETWEEN _start AND (_start + _stepsize); + + SET _start = _start + _stepsize + 1; + END WHILE; +END; +// +DELIMITER ; + +CALL randomize_names(); +DROP PROCEDURE IF EXISTS randomize_names; + +-- 3️⃣ Mask birthdates +UPDATE person +SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*182-182) DAY) +WHERE birthdate IS NOT NULL AND DATEDIFF(NOW(), birthdate) > 15*365; --- Verify the changes in person_name -SELECT person_id, given_name, middle_name, family_name -FROM person_name -WHERE given_name LIKE 'Patient%' LIMIT 5; +UPDATE person +SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*91-91) DAY) +WHERE birthdate IS NOT NULL AND DATEDIFF(NOW(), birthdate) BETWEEN 5*365 AND 15*365; --- 3️⃣ Mask birthdates (simple +/- 90 days) UPDATE person -SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*180-90) DAY) -WHERE birthdate IS NOT NULL; +SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*30-30) DAY) +WHERE birthdate IS NOT NULL AND DATEDIFF(NOW(), birthdate) < 5*365; --- Verify the changes in person birthdate -SELECT person_id, birthdate -FROM person -WHERE birthdate IS NOT NULL LIMIT 5; +UPDATE person SET birthdate_estimated = CAST(RAND() AS SIGNED); --- 4️⃣ Mask death dates (simple +/- 90 days) +-- 4️⃣ Mask death dates UPDATE person -SET death_date = DATE_ADD(death_date, INTERVAL FLOOR(RAND()*180-90) DAY) +SET death_date = DATE_ADD(death_date, INTERVAL FLOOR(RAND()*91-91) DAY) WHERE death_date IS NOT NULL; --- Verify the changes in person death_date -SELECT person_id, death_date -FROM person -WHERE death_date IS NOT NULL LIMIT 5; - --- 5️⃣ Mask addresses +-- 5️⃣ Mask person addresses UPDATE person_address -SET address1 = CONCAT('Address1-', person_id), - address2 = CONCAT('Address2-', person_id), +SET address1 = CONCAT('address1-', person_id), + address2 = CONCAT('address2-', person_id), + city_village = CONCAT('city-', person_id), -- Mask city_village latitude = NULL, - longitude = NULL; + longitude = NULL, + date_created = NOW(), + date_voided = NOW(); --- Verify the changes in person_address -SELECT person_id, address1, address2, latitude, longitude -FROM person_address -WHERE address1 LIKE 'Address1-%' LIMIT 5; -- 6️⃣ Mask locations UPDATE location SET name = CONCAT('Location-', location_id); --- Verify the changes in location -SELECT location_id, name -FROM location -WHERE name LIKE 'Location-%' LIMIT 5; +-- 7️⃣ Mask users +UPDATE users +SET username = CONCAT('username-', user_id) +WHERE username NOT IN ('admin', 'superman', 'superuser'); --- 7️⃣ Mask usernames and passwords UPDATE users -SET username = CONCAT('username-', user_id), - password = 'dummy_password' +SET password = '4a1750c8607dfa237de36c6305715c223415189', + salt = 'c788c6ad82a157b712392ca695dfcf2eed193d7f', + secret_question = NULL, + secret_answer = NULL WHERE username NOT IN ('admin', 'superman', 'superuser'); --- Verify the changes in users -SELECT user_id, username, password -FROM users -WHERE username NOT IN ('admin', 'superman', 'superuser') LIMIT 5; +UPDATE global_property +SET property_value = 'admin' +WHERE property LIKE '%.username'; + +UPDATE global_property +SET property_value = 'test' +WHERE property LIKE '%.password'; + +-- 8️⃣ Mask patient identifiers +DROP TABLE IF EXISTS temp_patient_identifier_old; -- <-- add this line +CREATE TABLE temp_patient_identifier_old(patient_id INT, identifier VARCHAR(256), PRIMARY KEY(patient_id)); +INSERT INTO temp_patient_identifier_old SELECT patient_id, identifier FROM patient_identifier; + --- 8️⃣ Mask person attributes + +TRUNCATE patient_identifier; + +INSERT INTO patient_identifier (patient_id, identifier, identifier_type, location_id, preferred, creator, date_created, voided, uuid) +SELECT p.patient_id, + CONCAT((SELECT prefix FROM idgen_seq_id_gen ORDER BY RAND() LIMIT 1), p.patient_id), + (SELECT patient_identifier_type_id FROM patient_identifier_type WHERE name = 'Bahmni Id'), + 1, 1, 1, '20080101', 0, UUID() +FROM patient p; + +CREATE TABLE temp_person_uuid_old(person_id INT, uuid VARCHAR(256), PRIMARY KEY(person_id)); +INSERT INTO temp_person_uuid_old SELECT person_id, uuid FROM person; + +-- 9️⃣ Mask person attributes UPDATE person_attribute pa -JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id -SET pa.value = CONCAT(pat.name, '-', pa.person_id) -WHERE pat.name IN ('primaryRelative', 'primaryContact'); +INNER JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +SET pa.value = CONCAT('primaryRelative-', pa.person_id) +WHERE pat.name = 'primaryRelative'; --- Verify the changes in person_attribute -SELECT pa.person_id, pa.value, pat.name -FROM person_attribute pa -JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id -WHERE pat.name IN ('primaryRelative', 'primaryContact') LIMIT 5; +UPDATE person_attribute pa +INNER JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +SET pa.value = FLOOR(POW(10,9) + RAND() * (POW(10,10) - POW(10,9))) +WHERE pat.name = 'primaryContact'; --- 9️⃣ Clear sensitive local names -DELETE pa FROM person_attribute pa -JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +DELETE pa +FROM person_attribute pa +INNER JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal'); --- Verify that the sensitive local names were deleted -SELECT person_id, pa.value, pat.name -FROM person_attribute pa -JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id -WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal') LIMIT 5; +-- 🔟 Mask visit, encounter, obs dates +ALTER TABLE visit ADD COLUMN rand_increment INT; + +UPDATE visit +SET rand_increment = CAST(RAND()*91-91 AS SIGNED), + date_started = ADDDATE(date_started, rand_increment), + date_stopped = IF(date_stopped IS NULL, NULL, ADDDATE(date_stopped, rand_increment)), + date_voided = IF(date_voided IS NULL, NULL, ADDDATE(date_voided, rand_increment)), + date_created = ADDDATE(date_created, rand_increment); + +UPDATE encounter e +JOIN visit v ON e.visit_id = v.visit_id +SET e.encounter_datetime = ADDDATE(e.encounter_datetime, v.rand_increment), + e.date_voided = IF(e.date_voided IS NULL, NULL, ADDDATE(e.date_voided, v.rand_increment)), + e.date_created = ADDDATE(e.date_created, v.rand_increment); + +UPDATE obs o +JOIN encounter e ON e.encounter_id = o.encounter_id +JOIN visit v ON e.visit_id = v.visit_id +SET o.obs_datetime = ADDDATE(o.obs_datetime, v.rand_increment), + o.date_created = ADDDATE(o.date_created, v.rand_increment), + o.date_voided = IF(o.date_voided IS NULL, NULL, ADDDATE(o.date_voided, v.rand_increment)), + o.value_datetime = IF(o.value_datetime IS NULL, NULL, ADDDATE(o.value_datetime, v.rand_increment)); + +ALTER TABLE visit DROP COLUMN rand_increment; From 9e06900364eb0d50533a8f956ec3e819392d95ac Mon Sep 17 00:00:00 2001 From: Akhil Anto Tharayil Date: Tue, 4 Nov 2025 17:33:04 +0530 Subject: [PATCH 3/4] update script --- .../mask_openmrs_sensitive_data.sql | 193 ++++++------------ 1 file changed, 61 insertions(+), 132 deletions(-) diff --git a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql index d1505f7..6dda93a 100644 --- a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql +++ b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql @@ -14,74 +14,39 @@ TRUNCATE TABLE failed_events; SET FOREIGN_KEY_CHECKS=1; -- 2️⃣ Mask person names -DROP TABLE IF EXISTS random_names; - -CREATE TABLE random_names ( - rid INT NOT NULL AUTO_INCREMENT, - name VARCHAR(255) NOT NULL, - PRIMARY KEY (rid), - UNIQUE KEY name (name) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; - -INSERT IGNORE INTO random_names (name) -SELECT DISTINCT TRIM(given_name) FROM person_name WHERE given_name IS NOT NULL; -INSERT IGNORE INTO random_names (name) -SELECT DISTINCT TRIM(middle_name) FROM person_name WHERE middle_name IS NOT NULL; -INSERT IGNORE INTO random_names (name) -SELECT DISTINCT TRIM(family_name) FROM person_name WHERE family_name IS NOT NULL; - -DROP PROCEDURE IF EXISTS randomize_names; -DELIMITER // -CREATE PROCEDURE randomize_names() -BEGIN - DECLARE _size INT; - DECLARE _start INT DEFAULT 0; - DECLARE _stepsize INT DEFAULT 300; - - SELECT MAX(person_name_id) INTO _size FROM person_name; - - WHILE _start < _size DO - UPDATE person_name - SET given_name = ( - SELECT name FROM ( - SELECT rn.rid, rn.name - FROM random_names rn - ORDER BY RAND() - LIMIT 1 - ) tmp - ), - middle_name = given_name, - family_name = middle_name - WHERE person_name_id BETWEEN _start AND (_start + _stepsize); - - SET _start = _start + _stepsize + 1; - END WHILE; -END; -// -DELIMITER ; - -CALL randomize_names(); -DROP PROCEDURE IF EXISTS randomize_names; - --- 3️⃣ Mask birthdates +UPDATE person_name +SET given_name = CONCAT('Patient', person_id), + middle_name = CONCAT('Patient', person_id), + family_name = CONCAT('Patient', person_id) +WHERE person_id NOT IN ( + SELECT person_id FROM users WHERE username IN ('admin','superman','superuser') +); + +-- Verify the changes in person_name +SELECT person_id, given_name, middle_name, family_name +FROM person_name +WHERE given_name LIKE 'Patient%' LIMIT 5; + +-- 3️⃣ Mask birthdates (simple +/- 90 days) UPDATE person -SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*182-182) DAY) -WHERE birthdate IS NOT NULL AND DATEDIFF(NOW(), birthdate) > 15*365; +SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*180-90) DAY) +WHERE birthdate IS NOT NULL; -UPDATE person -SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*91-91) DAY) -WHERE birthdate IS NOT NULL AND DATEDIFF(NOW(), birthdate) BETWEEN 5*365 AND 15*365; +-- Verify the changes in person birthdate +SELECT person_id, birthdate +FROM person +WHERE birthdate IS NOT NULL LIMIT 5; +-- 4️⃣ Mask death dates (simple +/- 90 days) UPDATE person -SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*30-30) DAY) -WHERE birthdate IS NOT NULL AND DATEDIFF(NOW(), birthdate) < 5*365; +SET death_date = DATE_ADD(death_date, INTERVAL FLOOR(RAND()*180-90) DAY) +WHERE death_date IS NOT NULL; -UPDATE person SET birthdate_estimated = CAST(RAND() AS SIGNED); +-- Verify the changes in person death_date +SELECT person_id, death_date +FROM person +WHERE death_date IS NOT NULL LIMIT 5; --- 4️⃣ Mask death dates -UPDATE person -SET death_date = DATE_ADD(death_date, INTERVAL FLOOR(RAND()*91-91) DAY) -WHERE death_date IS NOT NULL; -- 5️⃣ Mask person addresses UPDATE person_address @@ -94,87 +59,51 @@ SET address1 = CONCAT('address1-', person_id), date_voided = NOW(); + +-- Verify the changes in person_address +SELECT person_id, address1, address2, latitude, longitude +FROM person_address +WHERE address1 LIKE 'Address1-%' LIMIT 5; + -- 6️⃣ Mask locations UPDATE location SET name = CONCAT('Location-', location_id); --- 7️⃣ Mask users -UPDATE users -SET username = CONCAT('username-', user_id) -WHERE username NOT IN ('admin', 'superman', 'superuser'); +-- Verify the changes in location +SELECT location_id, name +FROM location +WHERE name LIKE 'Location-%' LIMIT 5; +-- 7️⃣ Mask usernames and passwords UPDATE users -SET password = '4a1750c8607dfa237de36c6305715c223415189', - salt = 'c788c6ad82a157b712392ca695dfcf2eed193d7f', - secret_question = NULL, - secret_answer = NULL +SET username = CONCAT('username-', user_id), + password = 'dummy_password' WHERE username NOT IN ('admin', 'superman', 'superuser'); -UPDATE global_property -SET property_value = 'admin' -WHERE property LIKE '%.username'; - -UPDATE global_property -SET property_value = 'test' -WHERE property LIKE '%.password'; - --- 8️⃣ Mask patient identifiers -DROP TABLE IF EXISTS temp_patient_identifier_old; -- <-- add this line -CREATE TABLE temp_patient_identifier_old(patient_id INT, identifier VARCHAR(256), PRIMARY KEY(patient_id)); -INSERT INTO temp_patient_identifier_old SELECT patient_id, identifier FROM patient_identifier; +-- Verify the changes in users +SELECT user_id, username, password +FROM users +WHERE username NOT IN ('admin', 'superman', 'superuser') LIMIT 5; - - -TRUNCATE patient_identifier; - -INSERT INTO patient_identifier (patient_id, identifier, identifier_type, location_id, preferred, creator, date_created, voided, uuid) -SELECT p.patient_id, - CONCAT((SELECT prefix FROM idgen_seq_id_gen ORDER BY RAND() LIMIT 1), p.patient_id), - (SELECT patient_identifier_type_id FROM patient_identifier_type WHERE name = 'Bahmni Id'), - 1, 1, 1, '20080101', 0, UUID() -FROM patient p; - -CREATE TABLE temp_person_uuid_old(person_id INT, uuid VARCHAR(256), PRIMARY KEY(person_id)); -INSERT INTO temp_person_uuid_old SELECT person_id, uuid FROM person; - --- 9️⃣ Mask person attributes +-- 8️⃣ Mask person attributes UPDATE person_attribute pa -INNER JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id -SET pa.value = CONCAT('primaryRelative-', pa.person_id) -WHERE pat.name = 'primaryRelative'; +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +SET pa.value = CONCAT(pat.name, '-', pa.person_id) +WHERE pat.name IN ('primaryRelative', 'primaryContact'); -UPDATE person_attribute pa -INNER JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id -SET pa.value = FLOOR(POW(10,9) + RAND() * (POW(10,10) - POW(10,9))) -WHERE pat.name = 'primaryContact'; - -DELETE pa +-- Verify the changes in person_attribute +SELECT pa.person_id, pa.value, pat.name FROM person_attribute pa -INNER JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('primaryRelative', 'primaryContact') LIMIT 5; + +-- 9️⃣ Clear sensitive local names +DELETE pa FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal'); --- 🔟 Mask visit, encounter, obs dates -ALTER TABLE visit ADD COLUMN rand_increment INT; - -UPDATE visit -SET rand_increment = CAST(RAND()*91-91 AS SIGNED), - date_started = ADDDATE(date_started, rand_increment), - date_stopped = IF(date_stopped IS NULL, NULL, ADDDATE(date_stopped, rand_increment)), - date_voided = IF(date_voided IS NULL, NULL, ADDDATE(date_voided, rand_increment)), - date_created = ADDDATE(date_created, rand_increment); - -UPDATE encounter e -JOIN visit v ON e.visit_id = v.visit_id -SET e.encounter_datetime = ADDDATE(e.encounter_datetime, v.rand_increment), - e.date_voided = IF(e.date_voided IS NULL, NULL, ADDDATE(e.date_voided, v.rand_increment)), - e.date_created = ADDDATE(e.date_created, v.rand_increment); - -UPDATE obs o -JOIN encounter e ON e.encounter_id = o.encounter_id -JOIN visit v ON e.visit_id = v.visit_id -SET o.obs_datetime = ADDDATE(o.obs_datetime, v.rand_increment), - o.date_created = ADDDATE(o.date_created, v.rand_increment), - o.date_voided = IF(o.date_voided IS NULL, NULL, ADDDATE(o.date_voided, v.rand_increment)), - o.value_datetime = IF(o.value_datetime IS NULL, NULL, ADDDATE(o.value_datetime, v.rand_increment)); - -ALTER TABLE visit DROP COLUMN rand_increment; +-- Verify that the sensitive local names were deleted +SELECT person_id, pa.value, pat.name +FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal') LIMIT 5; From 386432a9b86818147495ab8af9e90b9a8eed77fa Mon Sep 17 00:00:00 2001 From: Akhil Anto Tharayil Date: Tue, 4 Nov 2025 18:02:57 +0530 Subject: [PATCH 4/4] changes inscript --- .../mask_openmrs_sensitive_data.sql | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql index 6dda93a..c4d8e7d 100644 --- a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql +++ b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql @@ -14,13 +14,22 @@ TRUNCATE TABLE failed_events; SET FOREIGN_KEY_CHECKS=1; -- 2️⃣ Mask person names -UPDATE person_name -SET given_name = CONCAT('Patient', person_id), - middle_name = CONCAT('Patient', person_id), - family_name = CONCAT('Patient', person_id) -WHERE person_id NOT IN ( - SELECT person_id FROM users WHERE username IN ('admin','superman','superuser') -); +-- 2️⃣ Mask person names with alphabetic suffixes +SET @counter := 0; + +UPDATE person_name pn +JOIN ( + SELECT person_id, CHAR(65 + (@counter := @counter + 1) % 26) AS suffix + FROM person_name + WHERE person_id NOT IN ( + SELECT person_id FROM users WHERE username IN ('admin','superman','superuser') + ) + ORDER BY person_id +) AS tmp ON pn.person_id = tmp.person_id +SET + pn.given_name = CONCAT('Patient', tmp.suffix), + pn.middle_name = CONCAT('Patient', tmp.suffix), + pn.family_name = CONCAT('Patient', tmp.suffix); -- Verify the changes in person_name SELECT person_id, given_name, middle_name, family_name