From b416346ea610bfd81b5e86dd3468c488ea3997a5 Mon Sep 17 00:00:00 2001 From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com> Date: Sat, 22 Nov 2025 19:29:00 +0530 Subject: [PATCH 1/4] Fix XML formatting in pom.xml --- pom.xml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 97a74ae..8f876b9 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,8 @@ 11 3.3.4 2.4.15 - 6.1.0-mapr + 6.1.0-mapr + @@ -26,6 +27,13 @@ provided + + org.apache.hbase + hbase-mapreduce + ${hbase.version} + provided + + org.apache.hadoop hadoop-mapreduce-client-core From 0b083b2225efc1f56d586a7050686bc46eda324d Mon Sep 17 00:00:00 2001 From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com> Date: Sat, 22 Nov 2025 19:30:45 +0530 Subject: [PATCH 2/4] Refactor map method to include TableMapper context --- .../java/com/mapr/health/job1/PHIAnonymizationMapper.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java b/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java index 815f61e..bf8a7d5 100644 --- a/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java +++ b/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java @@ -3,10 +3,8 @@ import com.mapr.health.util.SchemaUtility; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableMapper; +import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Mapper; - import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -26,9 +24,10 @@ public class PHIAnonymizationMapper private final Text outputValue = new Text(); @Override - public void map(ImmutableBytesWritable rowKey, Result result, Context context) + public void map(ImmutableBytesWritable rowKey, Result result, TableMapper.Context context) throws IOException, InterruptedException { + // 1. Extract Raw PHI and Metadata byte[] patientNameBytes = result.getValue(SchemaUtility.RAW_PHI_CF, SchemaUtility.PATIENT_NAME_COL); byte[] visitDateBytes = result.getValue(SchemaUtility.METADATA_CF, SchemaUtility.VISIT_DATE_COL); @@ -54,7 +53,6 @@ public void map(ImmutableBytesWritable rowKey, Result result, Context context) // 3. Prepare output for temporary MapR-FS location // Key: Hashed Patient ID // Value: All non-PHI/Anonymized data that needs to be retained (e.g., VISIT_DATE) - outputKey.set(anonymizedId); outputValue.set(visitDate); From 724bfd49a448edc1438e733fcb2e2b57d7168bf6 Mon Sep 17 00:00:00 2001 From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com> Date: Sat, 22 Nov 2025 19:31:31 +0530 Subject: [PATCH 3/4] Fix method signature in ArchivalTagReducer --- src/main/java/com/mapr/health/job2/ArchivalTagReducer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java b/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java index 8ba9275..8b58ac6 100644 --- a/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java +++ b/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java @@ -3,7 +3,7 @@ import com.mapr.health.util.SchemaUtility; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableReducer; +import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.io.Text; import java.io.IOException; @@ -17,7 +17,7 @@ public class ArchivalTagReducer extends TableReducer { @Override - public void reduce(Text key, Iterable values, Context context) + public void reduce(Text key, Iterable values, TableReducer.Context context) throws IOException, InterruptedException { // The key is the final RowKey: AnonymizedID_VisitDate From 73d19bea930d222857e58b415d909d4b3e7f0cee Mon Sep 17 00:00:00 2001 From: Ramkumar Ethiraj <64294490+ramethiraj@users.noreply.github.com> Date: Sat, 22 Nov 2025 19:32:23 +0530 Subject: [PATCH 4/4] Add import for TableMapReduceUtil in AnonymizationPipelineDriver --- .../java/com/mapr/health/driver/AnonymizationPipelineDriver.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java b/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java index bfbbfed..9de523a 100644 --- a/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java +++ b/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java @@ -18,6 +18,7 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; public class AnonymizationPipelineDriver extends Configured implements Tool {