From b416346ea610bfd81b5e86dd3468c488ea3997a5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com>
Date: Sat, 22 Nov 2025 19:29:00 +0530
Subject: [PATCH 1/4] Fix XML formatting in pom.xml
---
pom.xml | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 97a74ae..8f876b9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,7 +16,8 @@
11
3.3.4
2.4.15
- 6.1.0-mapr
+ 6.1.0-mapr
+
@@ -26,6 +27,13 @@
provided
+
+ org.apache.hbase
+ hbase-mapreduce
+ ${hbase.version}
+ provided
+
+
org.apache.hadoop
hadoop-mapreduce-client-core
From 0b083b2225efc1f56d586a7050686bc46eda324d Mon Sep 17 00:00:00 2001
From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com>
Date: Sat, 22 Nov 2025 19:30:45 +0530
Subject: [PATCH 2/4] Refactor map method to include TableMapper context
---
.../java/com/mapr/health/job1/PHIAnonymizationMapper.java | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java b/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java
index 815f61e..bf8a7d5 100644
--- a/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java
+++ b/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java
@@ -3,10 +3,8 @@
import com.mapr.health.util.SchemaUtility;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -26,9 +24,10 @@ public class PHIAnonymizationMapper
private final Text outputValue = new Text();
@Override
- public void map(ImmutableBytesWritable rowKey, Result result, Context context)
+ public void map(ImmutableBytesWritable rowKey, Result result, TableMapper.Context context)
throws IOException, InterruptedException {
+
// 1. Extract Raw PHI and Metadata
byte[] patientNameBytes = result.getValue(SchemaUtility.RAW_PHI_CF, SchemaUtility.PATIENT_NAME_COL);
byte[] visitDateBytes = result.getValue(SchemaUtility.METADATA_CF, SchemaUtility.VISIT_DATE_COL);
@@ -54,7 +53,6 @@ public void map(ImmutableBytesWritable rowKey, Result result, Context context)
// 3. Prepare output for temporary MapR-FS location
// Key: Hashed Patient ID
// Value: All non-PHI/Anonymized data that needs to be retained (e.g., VISIT_DATE)
-
outputKey.set(anonymizedId);
outputValue.set(visitDate);
From 724bfd49a448edc1438e733fcb2e2b57d7168bf6 Mon Sep 17 00:00:00 2001
From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com>
Date: Sat, 22 Nov 2025 19:31:31 +0530
Subject: [PATCH 3/4] Fix method signature in ArchivalTagReducer
---
src/main/java/com/mapr/health/job2/ArchivalTagReducer.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java b/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java
index 8ba9275..8b58ac6 100644
--- a/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java
+++ b/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java
@@ -3,7 +3,7 @@
import com.mapr.health.util.SchemaUtility;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableReducer;
+import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.Text;
import java.io.IOException;
@@ -17,7 +17,7 @@ public class ArchivalTagReducer
extends TableReducer {
@Override
- public void reduce(Text key, Iterable values, Context context)
+ public void reduce(Text key, Iterable values, TableReducer.Context context)
throws IOException, InterruptedException {
// The key is the final RowKey: AnonymizedID_VisitDate
From 73d19bea930d222857e58b415d909d4b3e7f0cee Mon Sep 17 00:00:00 2001
From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com>
Date: Sat, 22 Nov 2025 19:32:23 +0530
Subject: [PATCH 4/4] Add import for TableMapReduceUtil in
AnonymizationPipelineDriver
---
.../java/com/mapr/health/driver/AnonymizationPipelineDriver.java | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java b/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java
index bfbbfed..9de523a 100644
--- a/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java
+++ b/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java
@@ -18,6 +18,7 @@
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
public class AnonymizationPipelineDriver extends Configured implements Tool {