diff --git a/pom.xml b/pom.xml index 97a74ae..8f876b9 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,8 @@ 11 3.3.4 2.4.15 - 6.1.0-mapr + 6.1.0-mapr + @@ -26,6 +27,13 @@ provided + + org.apache.hbase + hbase-mapreduce + ${hbase.version} + provided + + org.apache.hadoop hadoop-mapreduce-client-core diff --git a/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java b/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java index bfbbfed..9de523a 100644 --- a/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java +++ b/src/main/java/com/mapr/health/driver/AnonymizationPipelineDriver.java @@ -18,6 +18,7 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; public class AnonymizationPipelineDriver extends Configured implements Tool { diff --git a/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java b/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java index 815f61e..bf8a7d5 100644 --- a/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java +++ b/src/main/java/com/mapr/health/job1/PHIAnonymizationMapper.java @@ -3,10 +3,8 @@ import com.mapr.health.util.SchemaUtility; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableMapper; +import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Mapper; - import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -26,9 +24,10 @@ public class PHIAnonymizationMapper private final Text outputValue = new Text(); @Override - public void map(ImmutableBytesWritable rowKey, Result result, Context context) + public void map(ImmutableBytesWritable rowKey, Result result, TableMapper.Context context) throws IOException, InterruptedException { + // 1. Extract Raw PHI and Metadata byte[] patientNameBytes = result.getValue(SchemaUtility.RAW_PHI_CF, SchemaUtility.PATIENT_NAME_COL); byte[] visitDateBytes = result.getValue(SchemaUtility.METADATA_CF, SchemaUtility.VISIT_DATE_COL); @@ -54,7 +53,6 @@ public void map(ImmutableBytesWritable rowKey, Result result, Context context) // 3. Prepare output for temporary MapR-FS location // Key: Hashed Patient ID // Value: All non-PHI/Anonymized data that needs to be retained (e.g., VISIT_DATE) - outputKey.set(anonymizedId); outputValue.set(visitDate); diff --git a/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java b/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java index 8ba9275..8b58ac6 100644 --- a/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java +++ b/src/main/java/com/mapr/health/job2/ArchivalTagReducer.java @@ -3,7 +3,7 @@ import com.mapr.health.util.SchemaUtility; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableReducer; +import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.io.Text; import java.io.IOException; @@ -17,7 +17,7 @@ public class ArchivalTagReducer extends TableReducer { @Override - public void reduce(Text key, Iterable values, Context context) + public void reduce(Text key, Iterable values, TableReducer.Context context) throws IOException, InterruptedException { // The key is the final RowKey: AnonymizedID_VisitDate