diff --git a/anonymiseScripts/anonymise-standard/README.md b/anonymiseScripts/anonymise-standard/README.md new file mode 100644 index 0000000..4d9bc46 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/README.md @@ -0,0 +1,70 @@ + + +# SQL Scripts for Data Anonymisation + +These SQL scripts are designed to **mask or anonymise sensitive data** across Bahmni modules — **OpenMRS**, **OpenELIS**, and **Odoo (10 & 16)** — enabling safe use of production-like datasets in **staging, demo, or training environments** while remaining privacy-compliant. + +--- + +## ⚙️ How to Execute + +### 1) OpenMRS Database + +Copy the script to the `openmrsdb` container and run: + +```bash +docker compose exec -it openmrsdb sh +mysql -uroot -padminAdmin!123 openmrs < /mask_openmrs_sensitive_data.sql +``` + +### 2) OpenELIS Database + +Copy the script to the `openelisdb` container and run: + +```bash +docker compose exec -it openelisdb sh +psql -U clinlims -d clinlims < /mask_openelis_sensitive_data.sql +``` + +### 3) Odoo Database (10 & 16) + +Copy the script to the `odoodb` container and run the respective version: + +**Odoo 16:** + +```bash +docker compose exec -it odoodb sh +psql -U odoo -d odoo < /mask_odoo16_sensitive_data.sql +``` + +**Odoo 10:** + +```bash +docker compose exec -it odoodb sh +psql -U odoo -d odoo < /mask_odoo10_sensitive_data.sql +``` + +--- + +## 📌 Overview of Scripts + +| Script Name | Application | Purpose | +| ---------------------------------- | ------------ | --------------------------------------------------------------------------------------------- | +| `mask_openmrs_sensitive_data.sql` | **OpenMRS** | Masks patient identifiers, names, addresses, phone numbers, and other PII. | +| `mask_openelis_sensitive_data.sql` | **OpenELIS** | Anonymises patient-related data while preserving test result links. | +| `mask_odoo16_sensitive_data.sql` | **Odoo 16** | Masks customer, supplier, and employee information (names, emails, phone numbers, addresses). | +| `mask_odoo10_sensitive_data.sql` | **Odoo 10** | Masks customer, supplier, and employee information in Odoo 10. | + +--- + +## 🧠 Purpose + +These scripts allow organizations using Bahmni to: + +* Work with **realistic datasets** for testing, development, or training. +* Ensure **no personally identifiable information (PII)** is exposed in non-production environments. +* Comply with **data protection and privacy regulations**. + +--- + + diff --git a/anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql new file mode 100644 index 0000000..fadf430 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_odoo10_sensitive_data.sql @@ -0,0 +1,99 @@ +\set ON_ERROR_STOP true + +-- =============================== +-- TRUNCATE TABLES +-- =============================== +TRUNCATE failed_events; +TRUNCATE mail_message CASCADE; +TRUNCATE mail_followers CASCADE; + +-- =============================== +-- MASK ADDRESSES +-- =============================== +UPDATE res_partner +SET street = concat('address1-', id), + street2 = concat('address2-', id), + city = concat('address3-', id); + +-- =============================== +-- MASK USERS (EXCLUDE admin, emrsync) +-- =============================== +UPDATE res_users +SET login = concat('user-', id) +WHERE login NOT IN ('admin', 'emrsync'); + +-- Set everyone's password as 'password' (hashed) +UPDATE res_users +SET password = '$1$lw8k34ec$xOY5xkPtTgTw/gN6nHiZq.' +WHERE login NOT IN ('admin', 'emrsync'); + +-- =============================== +-- MASK EVENT RECORDS +-- =============================== +UPDATE event_records +SET object = regexp_replace(object, '"customerId": "(.+?)"', '"customerId": "XXX123456"') +WHERE object::text LIKE '%customerId%'; + +-- =============================== +-- MASK MAIL ALIAS +-- =============================== +UPDATE mail_alias +SET alias_name = concat('alias-', id); + +-- =============================== +-- MASK PARTNER NAMES +-- =============================== +UPDATE res_partner +SET name = concat('User ', id), +display_name = concat('User ', id); + +-- =============================== +-- MASK PROVIDER NAME IN SALE ORDER +-- =============================== +UPDATE sale_order +SET provider_name = 'Provider-' || id; + +-- =============================== +-- REMOVE PARTNER IMAGES (stored in ir_attachment) +-- =============================== +UPDATE ir_attachment +SET db_datas = NULL +WHERE res_model = 'res.partner' AND res_field LIKE 'image%'; + +-- =============================== +-- SELECT QUERIES TO VERIFY CHANGES +-- =============================== + +-- Verify partner addresses +SELECT id, street, street2, city +FROM res_partner +LIMIT 5; + +-- Verify masked users +SELECT id, login, password +FROM res_users +WHERE login NOT IN ('admin', 'emrsync') +LIMIT 5; + +-- Verify event records masking +SELECT id, object +FROM event_records +WHERE object::text LIKE '%XXX123456%' +LIMIT 5; + +-- Verify mail_alias +SELECT id, alias_name +FROM mail_alias +LIMIT 5; + +-- Verify partner names +SELECT id, name +FROM res_partner +LIMIT 5; + +-- Verify partner images removed +SELECT id, name, res_field, db_datas IS NULL AS image_removed +FROM ir_attachment +WHERE res_model = 'res.partner' AND res_field LIKE 'image%' +LIMIT 5; + diff --git a/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql new file mode 100644 index 0000000..61f7532 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_odoo16_sensitive_data.sql @@ -0,0 +1,99 @@ +\set ON_ERROR_STOP true + +-- =============================== +-- TRUNCATE TABLES +-- =============================== +TRUNCATE res_partner_attributes; +TRUNCATE failed_events; +TRUNCATE mail_message CASCADE; +TRUNCATE mail_followers CASCADE; + +-- =============================== +-- MASK ADDRESSES +-- =============================== +UPDATE res_partner +SET street = concat('address1-', id), + street2 = concat('address2-', id), + city = concat('address3-', id); + +-- =============================== +-- MASK USERS (EXCLUDE admin, emrsync) +-- =============================== +UPDATE res_users +SET login = concat('user-', id) +WHERE login NOT IN ('admin', 'emrsync'); + +-- Set everyone's password as 'password' (hashed) +UPDATE res_users +SET password = '$1$lw8k34ec$xOY5xkPtTgTw/gN6nHiZq.' -- hashed password placeholder +WHERE login NOT IN ('admin', 'emrsync'); + +-- =============================== +-- MASK EVENT RECORDS +-- =============================== +UPDATE event_records +SET object = regexp_replace(object, '"customerId": "(.+?)"', '"customerId": "XXX123456"') +WHERE object::text LIKE '%customerId%'; + +-- =============================== +-- MASK MAIL ALIAS +-- =============================== +UPDATE mail_alias +SET alias_name = concat('alias-', id); + +-- =============================== +-- MASK PARTNER NAMES +-- =============================== +UPDATE res_partner +SET name = concat('User ', id), +display_name = concat('User ', id); + +-- =============================== +-- MASK PROVIDER NAME IN SALE ORDER +-- =============================== +UPDATE sale_order +SET provider_name = 'Provider-' || id; + +-- =============================== +-- REMOVE PARTNER IMAGES (stored in ir_attachment) +-- =============================== +UPDATE ir_attachment +SET db_datas = NULL +WHERE res_model = 'res.partner' AND res_field LIKE 'image%'; + +-- =============================== +-- SELECT QUERIES TO VERIFY CHANGES +-- =============================== + +-- Verify partner addresses +SELECT id, street, street2, city +FROM res_partner +LIMIT 5; + +-- Verify masked users +SELECT id, login, password +FROM res_users +WHERE login NOT IN ('admin', 'emrsync') +LIMIT 5; + +-- Verify event records masking +SELECT id, object +FROM event_records +WHERE object::text LIKE '%XXX123456%' +LIMIT 5; + +-- Verify mail_alias +SELECT id, alias_name +FROM mail_alias +LIMIT 5; + +-- Verify partner names +SELECT id, name +FROM res_partner +LIMIT 5; + +-- Verify partner images removed +SELECT id, name, res_field, db_datas IS NULL AS image_removed +FROM ir_attachment +WHERE res_model = 'res.partner' AND res_field LIKE 'image%' +LIMIT 5; diff --git a/anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql new file mode 100644 index 0000000..747a831 --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_openelis_sensitive_data.sql @@ -0,0 +1,68 @@ +\set ON_ERROR_STOP true + +-- 1️⃣ Clear failed events +TRUNCATE failed_events; +-- Verify that the table is empty +SELECT COUNT(*) AS failed_events_count FROM failed_events LIMIT 5; + +-- 2️⃣ Mask addresses (organization_address) +UPDATE organization_address AS oa +SET value = CONCAT('address-', oa.organization_id); + +-- Verify the changes in organization_address +SELECT * +FROM organization_address +LIMIT 5; + +-- 3️⃣ Mask patient identities +UPDATE patient_identity AS pi +SET identity_data = CONCAT('PRIMARYRELATIVE-', pi.patient_id); + +-- Verify the changes in patient_identity +SELECT * +FROM patient_identity +LIMIT 5; + +-- 4️⃣ Mask system users login (skip atomfeed users) +UPDATE system_user su +SET login_name = CONCAT('user-', lu.id) +FROM login_user lu +WHERE lu.login_name = su.login_name + AND su.login_name NOT IN ('admin', 'atomfeed'); + +-- Verify the changes in system_user (excluding atomfeed) +SELECT id, login_name +FROM system_user +WHERE login_name NOT IN ('admin', 'atomfeed') LIMIT 5; + +-- 5️⃣ Set everyone's password as adminADMIN! +UPDATE login_user +SET login_name = CONCAT('user-', id), + password = 'adminADMIN!' +WHERE login_name NOT IN ('admin', 'atomfeed'); + +-- Verify the changes in login_user +SELECT id, login_name, password +FROM login_user +WHERE login_name NOT IN ('admin', 'atomfeed') LIMIT 5; + +-- 6️⃣ Fix system_user login for users missing in login_user +UPDATE system_user su +SET login_name = CONCAT('userwologin-', su.id) +WHERE su.login_name NOT IN (SELECT login_name FROM login_user); + +-- Verify the changes in system_user (missing login_user entries) +SELECT id, login_name +FROM system_user +WHERE login_name LIKE 'userwologin-%' LIMIT 5; + +-- 7️⃣ Set first_name and last_name = login_name (skip atomfeed) +UPDATE system_user +SET first_name = login_name, + last_name = login_name +WHERE login_name NOT IN ('atomfeed'); + +-- Verify the changes in system_user (first_name and last_name) +SELECT id, first_name, last_name +FROM system_user +WHERE first_name = last_name LIMIT 5; diff --git a/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql new file mode 100644 index 0000000..c4d8e7d --- /dev/null +++ b/anonymiseScripts/anonymise-standard/mask_openmrs_sensitive_data.sql @@ -0,0 +1,118 @@ +SET FOREIGN_KEY_CHECKS=0; + +-- 1️⃣ Truncate tables not needed for testing/logs +TRUNCATE TABLE concept_proposal_tag_map; +TRUNCATE TABLE concept_proposal; +TRUNCATE TABLE hl7_in_archive; +TRUNCATE TABLE hl7_in_error; +TRUNCATE TABLE hl7_in_queue; +TRUNCATE TABLE user_property; +TRUNCATE TABLE notification_alert_recipient; +TRUNCATE TABLE notification_alert; +TRUNCATE TABLE failed_events; + +SET FOREIGN_KEY_CHECKS=1; + +-- 2️⃣ Mask person names +-- 2️⃣ Mask person names with alphabetic suffixes +SET @counter := 0; + +UPDATE person_name pn +JOIN ( + SELECT person_id, CHAR(65 + (@counter := @counter + 1) % 26) AS suffix + FROM person_name + WHERE person_id NOT IN ( + SELECT person_id FROM users WHERE username IN ('admin','superman','superuser') + ) + ORDER BY person_id +) AS tmp ON pn.person_id = tmp.person_id +SET + pn.given_name = CONCAT('Patient', tmp.suffix), + pn.middle_name = CONCAT('Patient', tmp.suffix), + pn.family_name = CONCAT('Patient', tmp.suffix); + +-- Verify the changes in person_name +SELECT person_id, given_name, middle_name, family_name +FROM person_name +WHERE given_name LIKE 'Patient%' LIMIT 5; + +-- 3️⃣ Mask birthdates (simple +/- 90 days) +UPDATE person +SET birthdate = DATE_ADD(birthdate, INTERVAL FLOOR(RAND()*180-90) DAY) +WHERE birthdate IS NOT NULL; + +-- Verify the changes in person birthdate +SELECT person_id, birthdate +FROM person +WHERE birthdate IS NOT NULL LIMIT 5; + +-- 4️⃣ Mask death dates (simple +/- 90 days) +UPDATE person +SET death_date = DATE_ADD(death_date, INTERVAL FLOOR(RAND()*180-90) DAY) +WHERE death_date IS NOT NULL; + +-- Verify the changes in person death_date +SELECT person_id, death_date +FROM person +WHERE death_date IS NOT NULL LIMIT 5; + + +-- 5️⃣ Mask person addresses +UPDATE person_address +SET address1 = CONCAT('address1-', person_id), + address2 = CONCAT('address2-', person_id), + city_village = CONCAT('city-', person_id), -- Mask city_village + latitude = NULL, + longitude = NULL, + date_created = NOW(), + date_voided = NOW(); + + + +-- Verify the changes in person_address +SELECT person_id, address1, address2, latitude, longitude +FROM person_address +WHERE address1 LIKE 'Address1-%' LIMIT 5; + +-- 6️⃣ Mask locations +UPDATE location +SET name = CONCAT('Location-', location_id); + +-- Verify the changes in location +SELECT location_id, name +FROM location +WHERE name LIKE 'Location-%' LIMIT 5; + +-- 7️⃣ Mask usernames and passwords +UPDATE users +SET username = CONCAT('username-', user_id), + password = 'dummy_password' +WHERE username NOT IN ('admin', 'superman', 'superuser'); + +-- Verify the changes in users +SELECT user_id, username, password +FROM users +WHERE username NOT IN ('admin', 'superman', 'superuser') LIMIT 5; + +-- 8️⃣ Mask person attributes +UPDATE person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +SET pa.value = CONCAT(pat.name, '-', pa.person_id) +WHERE pat.name IN ('primaryRelative', 'primaryContact'); + +-- Verify the changes in person_attribute +SELECT pa.person_id, pa.value, pat.name +FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('primaryRelative', 'primaryContact') LIMIT 5; + +-- 9️⃣ Clear sensitive local names +DELETE pa FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal'); + +-- Verify that the sensitive local names were deleted +SELECT person_id, pa.value, pat.name +FROM person_attribute pa +JOIN person_attribute_type pat ON pa.person_attribute_type_id = pat.person_attribute_type_id +WHERE pat.name IN ('givenNameLocal', 'familyNameLocal', 'middleNameLocal') LIMIT 5;